From pypy.commits at gmail.com Fri Jun 1 03:03:20 2018 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Jun 2018 00:03:20 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: From PR #615: be more careful when calling closedir() Message-ID: <5b10efb8.1c69fb81.416a7.e5a0@mx.google.com> Author: Armin Rigo Branch: py3.6 Changeset: r94717:77ccc18f0391 Date: 2018-06-01 09:02 +0200 http://bitbucket.org/pypy/pypy/changeset/77ccc18f0391/ Log: From PR #615: be more careful when calling closedir() diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -79,8 +79,10 @@ self._close() def _close(self): - rposix_scandir.closedir(self.dirp) - self.dirp = rposix_scandir.NULL_DIRP + dirp = self.dirp + if dirp: + self.dirp = rposix_scandir.NULL_DIRP + rposix_scandir.closedir(dirp) def iter_w(self): return self From pypy.commits at gmail.com Fri Jun 1 04:23:23 2018 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Jun 2018 01:23:23 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Issue #2834 Message-ID: <5b11027b.1c69fb81.7425d.1250@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r94718:248e6b2994ed Date: 2018-06-01 10:22 +0200 http://bitbucket.org/pypy/pypy/changeset/248e6b2994ed/ Log: Issue #2834 Try to call _bootstrap._install() at translation time only, not at runtime. diff --git a/pypy/module/_frozen_importlib/__init__.py b/pypy/module/_frozen_importlib/__init__.py --- a/pypy/module/_frozen_importlib/__init__.py +++ b/pypy/module/_frozen_importlib/__init__.py @@ -1,4 +1,5 @@ import os +from rpython.rlib.objectmodel import we_are_translated from pypy.interpreter.mixedmodule import MixedModule from pypy.module.sys import initpath from pypy.module._frozen_importlib import interp_import @@ -77,8 +78,17 @@ def startup(self, space): """Copy our __import__ to builtins.""" + if not we_are_translated(): + self.startup_at_translation_time_only(space) + self.space.builtin.setdictvalue(space, '__import__', self.w_import) + + def startup_at_translation_time_only(self, space): + # Issue #2834 + # Call _bootstrap._install() at translation time only, not at + # runtime. By looking around what it does, this should not + # freeze any machine-specific paths. I *think* it only sets up + # stuff that depends on the platform. w_install = self.getdictvalue(space, '_install') space.call_function(w_install, space.getbuiltinmodule('sys'), space.getbuiltinmodule('_imp')) - self.space.builtin.setdictvalue(space, '__import__', self.w_import) From pypy.commits at gmail.com Fri Jun 1 09:30:31 2018 From: pypy.commits at gmail.com (mjacob) Date: Fri, 01 Jun 2018 06:30:31 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: _collections.deque: Raise MemoryError if the size of the deque multiplied by an integer is larger than what would fit in an RPython int. Message-ID: <5b114a77.1c69fb81.59b0f.7faa@mx.google.com> Author: Manuel Jacob Branch: py3.5 Changeset: r94719:381f24401707 Date: 2018-06-01 15:29 +0200 http://bitbucket.org/pypy/pypy/changeset/381f24401707/ Log: _collections.deque: Raise MemoryError if the size of the deque multiplied by an integer is larger than what would fit in an RPython int. diff --git a/pypy/module/_collections/interp_deque.py b/pypy/module/_collections/interp_deque.py --- a/pypy/module/_collections/interp_deque.py +++ b/pypy/module/_collections/interp_deque.py @@ -1,5 +1,6 @@ import sys from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import ovfcheck from pypy.interpreter import gateway from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.typedef import TypeDef, make_weakref_descr @@ -192,9 +193,13 @@ def mul(self, w_int): space = self.space + num = space.int_w(w_int) + try: + ovfcheck(self.len * num) + except OverflowError: + raise MemoryError copied = W_Deque(space) copied.maxlen = self.maxlen - num = space.int_w(w_int) for _ in range(num): copied.extend(self) @@ -212,6 +217,10 @@ if num <= 0: self.clear() return self + try: + ovfcheck(self.len * num) + except OverflowError: + raise MemoryError # use a copy to extend self copy = W_Deque(space) copy.maxlen = self.maxlen diff --git a/pypy/module/_collections/test/test_deque.py b/pypy/module/_collections/test/test_deque.py --- a/pypy/module/_collections/test/test_deque.py +++ b/pypy/module/_collections/test/test_deque.py @@ -372,6 +372,20 @@ assert d == deque('a' * n) assert d.maxlen is None + def test_deque_repeat_big(self): + import sys + from _collections import deque + d = deque([0]) + d *= 2**8 + if sys.maxsize <= 2147483647: + raises(MemoryError, d.__mul__, 2**24) + raises(MemoryError, d.__rmul__, 2**24) + raises(MemoryError, d.__imul__, 2**24) + else: + raises(MemoryError, d.__mul__, 2**56) + raises(MemoryError, d.__rmul__, 2**56) + raises(MemoryError, d.__imul__, 2**56) + def test_deque_insert(self): from _collections import deque for i in range(0,11): From pypy.commits at gmail.com Fri Jun 1 13:40:44 2018 From: pypy.commits at gmail.com (arigo) Date: Fri, 01 Jun 2018 10:40:44 -0700 (PDT) Subject: [pypy-commit] cffi default: Py3.7: fix const-correctness Message-ID: <5b11851c.1c69fb81.ba806.0193@mx.google.com> Author: Armin Rigo Branch: Changeset: r3121:af3a1e8a51e0 Date: 2018-06-01 19:32 +0200 http://bitbucket.org/cffi/cffi/changeset/af3a1e8a51e0/ Log: Py3.7: fix const-correctness diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -4154,7 +4154,7 @@ dl_methods, /* tp_methods */ }; -static void *b_do_dlopen(PyObject *args, char **p_printable_filename, +static void *b_do_dlopen(PyObject *args, const char **p_printable_filename, PyObject **p_temp) { /* Logic to call the correct version of dlopen(). Returns NULL in case of error. @@ -4224,7 +4224,7 @@ static PyObject *b_load_library(PyObject *self, PyObject *args) { - char *printable_filename; + const char *printable_filename; PyObject *temp; void *handle; DynLibObject *dlobj = NULL; diff --git a/c/cdlopen.c b/c/cdlopen.c --- a/c/cdlopen.c +++ b/c/cdlopen.c @@ -40,7 +40,7 @@ static PyObject *ffi_dlopen(PyObject *self, PyObject *args) { - char *modname; + const char *modname; PyObject *temp, *result = NULL; void *handle; diff --git a/c/lib_obj.c b/c/lib_obj.c --- a/c/lib_obj.c +++ b/c/lib_obj.c @@ -611,7 +611,7 @@ offsetof(LibObject, l_dict), /* tp_dictoffset */ }; -static LibObject *lib_internal_new(FFIObject *ffi, char *module_name, +static LibObject *lib_internal_new(FFIObject *ffi, const char *module_name, void *dlopen_libhandle) { LibObject *lib; From pypy.commits at gmail.com Fri Jun 1 14:07:05 2018 From: pypy.commits at gmail.com (mattip) Date: Fri, 01 Jun 2018 11:07:05 -0700 (PDT) Subject: [pypy-commit] pypy default: typo Message-ID: <5b118b49.1c69fb81.2cec1.b595@mx.google.com> Author: Matti Picus Branch: Changeset: r94720:58cf698c9a54 Date: 2018-06-01 11:06 -0700 http://bitbucket.org/pypy/pypy/changeset/58cf698c9a54/ Log: typo diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not From pypy.commits at gmail.com Sat Jun 2 15:45:56 2018 From: pypy.commits at gmail.com (standy66) Date: Sat, 02 Jun 2018 12:45:56 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Fix _sysconfigdata on MacOS Message-ID: <5b12f3f4.1c69fb81.e9246.ba68@mx.google.com> Author: Andrew Stepanov Branch: py3.5 Changeset: r94721:87fe9e08fe50 Date: 2018-06-02 16:28 +0300 http://bitbucket.org/pypy/pypy/changeset/87fe9e08fe50/ Log: Fix _sysconfigdata on MacOS diff --git a/lib_pypy/_sysconfigdata.py b/lib_pypy/_sysconfigdata.py --- a/lib_pypy/_sysconfigdata.py +++ b/lib_pypy/_sysconfigdata.py @@ -24,6 +24,15 @@ 'VERSION': sys.version[:3] } +if find_executable("gcc"): + build_time_vars.update({ + "CC": "gcc -pthread", + "GNULD": "yes", + "LDSHARED": "gcc -pthread -shared", + }) + if find_executable("g++"): + build_time_vars["CXX"] = "g++ -pthread" + if sys.platform[:6] == "darwin": import platform if platform.machine() == 'i386': @@ -36,12 +45,6 @@ arch = platform.machine() build_time_vars['LDSHARED'] += ' -undefined dynamic_lookup' build_time_vars['CC'] += ' -arch %s' % (arch,) + if "CXX" in build_time_vars: + build_time_vars['CXX'] += ' -arch %s' % (arch,) -if find_executable("gcc"): - build_time_vars.update({ - "CC": "gcc -pthread", - "GNULD": "yes", - "LDSHARED": "gcc -pthread -shared", - }) - if find_executable("g++"): - build_time_vars["CXX"] = "g++ -pthread" From pypy.commits at gmail.com Sun Jun 3 17:07:26 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 03 Jun 2018 14:07:26 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: rename for backward compatibility Message-ID: <5b14588e.1c69fb81.1a903.8418@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r94722:27800b9efa30 Date: 2018-06-03 13:53 -0700 http://bitbucket.org/pypy/pypy/changeset/27800b9efa30/ Log: rename for backward compatibility diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -456,9 +456,9 @@ return space.newunicode(uc) @unmarshaller(TYPE_INTERNED) -def unmarshal_bytes(space, u, tc): - w_u = unmarshal_unicode(space, u, tc) - return u.space.new_interned_w_str(w_u) +def unmarshal_interned(space, u, tc): + w_ret = unmarshal_unicode(space, u, tc) + return u.space.new_interned_w_str(w_ret) def _unmarshal_ascii(u, short_length, interned): if short_length: From pypy.commits at gmail.com Mon Jun 4 08:11:39 2018 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 04 Jun 2018 05:11:39 -0700 (PDT) Subject: [pypy-commit] pypy default: essential typo fix Message-ID: <5b152c7b.1c69fb81.a89f2.8995@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r94723:bc8f427e0686 Date: 2018-06-04 14:10 +0200 http://bitbucket.org/pypy/pypy/changeset/bc8f427e0686/ Log: essential typo fix diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1096,7 +1096,7 @@ s = self.get_first_expr("'hi' ' implicitly' ' extra'") assert isinstance(s, ast.Str) assert space.eq_w(s.s, space.wrap("hi implicitly extra")) - sentence = u"Die Männer ärgen sich!" + sentence = u"Die Männer ärgern sich!" source = u"# coding: utf-7\nstuff = u'%s'" % (sentence,) info = pyparse.CompileInfo("", "exec") tree = self.parser.parse_source(source.encode("utf-7"), info) diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -38,7 +38,7 @@ """, info=info) assert tree.type == syms.file_input assert info.encoding == "iso-8859-1" - sentence = u"u'Die Männer ärgen sich!'" + sentence = u"u'Die Männer ärgern sich!'" input = (u"# coding: utf-7\nstuff = %s" % (sentence,)).encode("utf-7") tree = self.parse(input, info=info) assert info.encoding == "utf-7" diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -308,9 +308,9 @@ _assert_decu7(' +AOQ- ', u' \xe4 ') _assert_decu7('+AOQ-+AOQ-', u'\xe4\xe4') - s_utf7 = 'Die M+AOQ-nner +AOQ-rgen sich!' - s_utf8 = u'Die Männer ärgen sich!' - s_utf8_esc = u'Die M\xe4nner \xe4rgen sich!' + s_utf7 = 'Die M+AOQ-nner +AOQ-rgern sich!' + s_utf8 = u'Die Männer ärgern sich!' + s_utf8_esc = u'Die M\xe4nner \xe4rgern sich!' _assert_decu7(s_utf7, s_utf8_esc) _assert_decu7(s_utf7, s_utf8) From pypy.commits at gmail.com Tue Jun 5 03:48:04 2018 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 05 Jun 2018 00:48:04 -0700 (PDT) Subject: [pypy-commit] pypy pyparser-improvements-3: merge default Message-ID: <5b164034.1c69fb81.b1f47.4d08@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: pyparser-improvements-3 Changeset: r94724:6bbf7066c948 Date: 2018-06-04 14:55 +0200 http://bitbucket.org/pypy/pypy/changeset/6bbf7066c948/ Log: merge default diff too long, truncating to 2000 out of 17409 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -33,7 +33,12 @@ 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 0e2d9a73f5a1818d0245d75daccdbe21b2d5c3ef release-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +d7724c0a5700b895a47de44074cdf5fd659a988f RevDB-pypy2.7-v5.4.1 aff251e543859ce4508159dd9f1a82a2f553de00 release-pypy2.7-v5.6.0 +e90317857d27917bf840caf675832292ee070510 RevDB-pypy2.7-v5.6.1 +a24d6c7000c8099c73d3660857f7e3cee5ac045c RevDB-pypy2.7-v5.6.2 fa3249d55d15b9829e1be69cdf45b5a44cec902d release-pypy2.7-v5.7.0 b16a4363e930f6401bceb499b9520955504c6cb0 release-pypy3.5-v5.7.0 1aa2d8e03cdfab54b7121e93fda7e98ea88a30bf release-pypy2.7-v5.7.1 @@ -51,3 +56,5 @@ 0000000000000000000000000000000000000000 release-pypy3.5-v5.10.0 09f9160b643e3f02ccb8c843b2fbb4e5cbf54082 release-pypy3.5-v5.10.0 3f6eaa010fce78cc7973bdc1dfdb95970f08fed2 release-pypy3.5-v5.10.1 +ab0b9caf307db6592905a80b8faffd69b39005b8 release-pypy2.7-v6.0.0 +fdd60ed87e941677e8ea11acf9f1819466521bf2 release-pypy3.5-v6.0.0 diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -247,6 +247,7 @@ Lukas Vacek Omer Katz Jacek Generowicz + Tomasz Dziopa Sylvain Thenault Jakub Stasiak Andrew Dalke @@ -307,6 +308,7 @@ Yury V. Zaytsev florinpapa Anders Sigfridsson + Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert diff --git a/dotviewer/font/NOTICE b/dotviewer/font/COPYING.txt rename from dotviewer/font/NOTICE rename to dotviewer/font/COPYING.txt diff --git a/lib-python/2.7/opcode.py b/lib-python/2.7/opcode.py --- a/lib-python/2.7/opcode.py +++ b/lib-python/2.7/opcode.py @@ -194,5 +194,6 @@ def_op('CALL_METHOD', 202) # #args not including 'self' def_op('BUILD_LIST_FROM_ARG', 203) jrel_op('JUMP_IF_NOT_DEBUG', 204) # jump over assert statements +def_op('LOAD_REVDB_VAR', 205) # reverse debugger (syntax example: $5) del def_op, name_op, jrel_op, jabs_op diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -82,8 +82,11 @@ def _CData_output(self, resarray, base=None, index=-1): from _rawffi.alt import types # If a char_p or unichar_p is received, skip the string interpretation - if base._ffiargtype != types.Pointer(types.char_p) and \ - base._ffiargtype != types.Pointer(types.unichar_p): + try: + deref = type(base)._deref_ffiargtype() + except AttributeError: + deref = None + if deref != types.char_p and deref != types.unichar_p: # this seems to be a string if we're array of char, surprise! from ctypes import c_char, c_wchar if self._type_ is c_char: @@ -120,6 +123,12 @@ value = self(*value) return _CDataMeta.from_param(self, value) + def _build_ffiargtype(self): + return _ffi.types.Pointer(self._type_.get_ffi_argtype()) + + def _deref_ffiargtype(self): + return self._type_.get_ffi_argtype() + def array_get_slice_params(self, index): if hasattr(self, '_length_'): start, stop, step = index.indices(self._length_) @@ -248,6 +257,5 @@ _type_ = base ) cls = ArrayMeta(name, (Array,), tpdict) - cls._ffiargtype = _ffi.types.Pointer(base.get_ffi_argtype()) ARRAY_CACHE[key] = cls return cls diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -49,10 +49,13 @@ else: return self.from_param(as_parameter) + def _build_ffiargtype(self): + return _shape_to_ffi_type(self._ffiargshape_) + def get_ffi_argtype(self): if self._ffiargtype: return self._ffiargtype - self._ffiargtype = _shape_to_ffi_type(self._ffiargshape_) + self._ffiargtype = self._build_ffiargtype() return self._ffiargtype def _CData_output(self, resbuffer, base=None, index=-1): diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -70,7 +70,12 @@ self._ffiarray = ffiarray self.__init__ = __init__ self._type_ = TP - self._ffiargtype = _ffi.types.Pointer(TP.get_ffi_argtype()) + + def _build_ffiargtype(self): + return _ffi.types.Pointer(self._type_.get_ffi_argtype()) + + def _deref_ffiargtype(self): + return self._type_.get_ffi_argtype() from_address = cdata_from_address diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -160,6 +160,10 @@ raise AttributeError("_fields_ is final") if self in [f[1] for f in value]: raise AttributeError("Structure or union cannot contain itself") + if self._ffiargtype is not None: + raise NotImplementedError("Too late to set _fields_: we already " + "said to libffi that the structure type %s is opaque" + % (self,)) names_and_fields( self, value, self.__bases__[0], diff --git a/lib_pypy/grp.py b/lib_pypy/grp.py --- a/lib_pypy/grp.py +++ b/lib_pypy/grp.py @@ -4,6 +4,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -33,32 +35,35 @@ @builtinify def getgrgid(gid): - res = lib.getgrgid(gid) - if not res: - # XXX maybe check error eventually - raise KeyError(gid) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrgid(gid) + if not res: + # XXX maybe check error eventually + raise KeyError(gid) + return _group_from_gstruct(res) @builtinify def getgrnam(name): if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - res = lib.getgrnam(name) - if not res: - raise KeyError("'getgrnam(): name not found: %s'" % name) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrnam(name) + if not res: + raise KeyError("'getgrnam(): name not found: %s'" % name) + return _group_from_gstruct(res) @builtinify def getgrall(): - lib.setgrent() lst = [] - while 1: - p = lib.getgrent() - if not p: - break - lst.append(_group_from_gstruct(p)) - lib.endgrent() + with _lock: + lib.setgrent() + while 1: + p = lib.getgrent() + if not p: + break + lst.append(_group_from_gstruct(p)) + lib.endgrent() return lst __all__ = ('struct_group', 'getgrgid', 'getgrnam', 'getgrall') diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py --- a/lib_pypy/pwd.py +++ b/lib_pypy/pwd.py @@ -12,6 +12,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -55,10 +57,11 @@ Return the password database entry for the given numeric user ID. See pwd.__doc__ for more on password database entries. """ - pw = lib.getpwuid(uid) - if not pw: - raise KeyError("getpwuid(): uid not found: %s" % uid) - return _mkpwent(pw) + with _lock: + pw = lib.getpwuid(uid) + if not pw: + raise KeyError("getpwuid(): uid not found: %s" % uid) + return _mkpwent(pw) @builtinify def getpwnam(name): @@ -71,10 +74,11 @@ if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - pw = lib.getpwnam(name) - if not pw: - raise KeyError("getpwname(): name not found: %s" % name) - return _mkpwent(pw) + with _lock: + pw = lib.getpwnam(name) + if not pw: + raise KeyError("getpwname(): name not found: %s" % name) + return _mkpwent(pw) @builtinify def getpwall(): @@ -84,13 +88,14 @@ See pwd.__doc__ for more on password database entries. """ users = [] - lib.setpwent() - while True: - pw = lib.getpwent() - if not pw: - break - users.append(_mkpwent(pw)) - lib.endpwent() + with _lock: + lib.setpwent() + while True: + pw = lib.getpwent() + if not pw: + break + users.append(_mkpwent(pw)) + lib.endpwent() return users __all__ = ('struct_passwd', 'getpwuid', 'getpwnam', 'getpwall') diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -57,6 +57,11 @@ "termios", "_minimal_curses", ]) +reverse_debugger_disable_modules = set([ + "_continuation", "_vmprof", "_multiprocessing", + "micronumpy", + ]) + # XXX this should move somewhere else, maybe to platform ("is this posixish" # check or something) if sys.platform == "win32": @@ -292,6 +297,9 @@ modules = working_modules.copy() if config.translation.sandbox: modules = default_modules + if config.translation.reverse_debugger: + for mod in reverse_debugger_disable_modules: + setattr(config.objspace.usemodules, mod, False) # ignore names from 'essential_modules', notably 'exceptions', which # may not be present in config.objspace.usemodules at all modules = [name for name in modules if name not in essential_modules] diff --git a/pypy/doc/architecture.rst b/pypy/doc/architecture.rst --- a/pypy/doc/architecture.rst +++ b/pypy/doc/architecture.rst @@ -73,3 +73,63 @@ This division between bytecode evaluator and object space gives a lot of flexibility. One can plug in different :doc:`object spaces ` to get different or enriched behaviours of the Python objects. + +Layers +------ + +RPython +~~~~~~~ +:ref:`RPython ` is the language in which we write interpreters. +Not the entire PyPy project is written in RPython, only the parts that are +compiled in the translation process. The interesting point is that RPython +has no parser, it's compiled from the live python objects, which makes it +possible to do all kinds of metaprogramming during import time. In short, +Python is a meta programming language for RPython. + +The RPython standard library is to be found in the ``rlib`` subdirectory. + +Consult `Getting Started with RPython`_ for further reading + +Translation +~~~~~~~~~~~ +The translation toolchain - this is the part that takes care of translating +RPython to flow graphs and then to C. There is more in the +:doc:`architecture ` document written about it. + +It lives in the ``rpython`` directory: ``flowspace``, ``annotator`` +and ``rtyper``. + +PyPy Interpreter +~~~~~~~~~~~~~~~~ +This is in the ``pypy`` directory. ``pypy/interpreter`` is a standard +interpreter for Python written in RPython. The fact that it is +RPython is not apparent at first. Built-in modules are written in +``pypy/module/*``. Some modules that CPython implements in C are +simply written in pure Python; they are in the top-level ``lib_pypy`` +directory. The standard library of Python (with a few changes to +accomodate PyPy) is in ``lib-python``. + +JIT Compiler +~~~~~~~~~~~~ +:ref:`Just-in-Time Compiler (JIT) `: we have a tracing JIT that traces the +interpreter written in RPython, rather than the user program that it +interprets. As a result it applies to any interpreter, i.e. any +language. But getting it to work correctly is not trivial: it +requires a small number of precise "hints" and possibly some small +refactorings of the interpreter. The JIT itself also has several +almost-independent parts: the tracer itself in ``rpython/jit/metainterp``, the +optimizer in ``rpython/jit/metainterp/optimizer`` that optimizes a list of +residual operations, and the backend in ``rpython/jit/backend/`` +that turns it into machine code. Writing a new backend is a +traditional way to get into the project. + +Garbage Collectors +~~~~~~~~~~~~~~~~~~ +Garbage Collectors (GC): as you may notice if you are used to CPython's +C code, there are no ``Py_INCREF/Py_DECREF`` equivalents in RPython code. +:ref:`rpython:garbage-collection` is inserted +during translation. Moreover, this is not reference counting; it is a real +GC written as more RPython code. The best one we have so far is in +``rpython/memory/gc/incminimark.py``. + +.. _`Getting started with RPython`: http://rpython.readthedocs.org/en/latest/getting-started.html diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -267,14 +267,14 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar commands at install time; the exact list is in - :source:`pypy/tool/release/package.py `. Users + :source:`pypy/tool/release/package.py`. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. * PyPy 2.6 and later: anyone would get ``ImportError: no module named _gdbm_cffi``. Installers need to run ``pypy _gdbm_build.py`` in the ``lib_pypy`` directory during the installation process (plus others; - see the exact list in :source:`pypy/tool/release/package.py `). + see the exact list in :source:`pypy/tool/release/package.py`). Users seeing a broken installation of PyPy can fix it after-the-fact, by running ``pypy /path/to/lib_pypy/_gdbm_build.py``. This command produces a file diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -539,7 +539,7 @@ hg help branch -.. _official wiki: http://mercurial.selenic.com/wiki/Branch +.. _official wiki: https://www.mercurial-scm.org/wiki/ .. _using-development-tracker: @@ -547,15 +547,7 @@ Using the development bug/feature tracker ----------------------------------------- -We have a `development tracker`_, based on Richard Jones' -`roundup`_ application. You can file bugs, -feature requests or see what's going on -for the next milestone, both from an E-Mail and from a -web interface. - -.. _development tracker: https://bugs.pypy.org/ -.. _roundup: http://roundup.sourceforge.net/ - +We use bitbucket for :source:`issues` tracking and :source:`pull-requests`. .. _testing: diff --git a/pypy/doc/commandline_ref.rst b/pypy/doc/commandline_ref.rst --- a/pypy/doc/commandline_ref.rst +++ b/pypy/doc/commandline_ref.rst @@ -8,3 +8,4 @@ :maxdepth: 1 man/pypy.1.rst + man/pypy3.1.rst diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.8' +version = '6.0' # The full version, including alpha/beta/rc tags. -release = '5.8.0' +release = '6.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/doc/config/objspace.usemodules._cppyy.txt b/pypy/doc/config/objspace.usemodules._cppyy.txt new file mode 100644 --- /dev/null +++ b/pypy/doc/config/objspace.usemodules._cppyy.txt @@ -0,0 +1,1 @@ +The internal backend for cppyy diff --git a/pypy/doc/config/objspace.usemodules._rawffi.txt b/pypy/doc/config/objspace.usemodules._rawffi.txt --- a/pypy/doc/config/objspace.usemodules._rawffi.txt +++ b/pypy/doc/config/objspace.usemodules._rawffi.txt @@ -1,3 +1,3 @@ -An experimental module providing very low-level interface to +A module providing very low-level interface to C-level libraries, for use when implementing ctypes, not -intended for a direct use at all. \ No newline at end of file +intended for a direct use at all. diff --git a/pypy/doc/config/objspace.usemodules.cpyext.txt b/pypy/doc/config/objspace.usemodules.cpyext.txt --- a/pypy/doc/config/objspace.usemodules.cpyext.txt +++ b/pypy/doc/config/objspace.usemodules.cpyext.txt @@ -1,1 +1,1 @@ -Use (experimental) cpyext module, that tries to load and run CPython extension modules +Use cpyext module to load and run CPython extension modules diff --git a/pypy/doc/contributing.rst b/pypy/doc/contributing.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/contributing.rst @@ -0,0 +1,472 @@ +Contributing Guidelines +=========================== + +.. contents:: + +PyPy is a very large project that has a reputation of being hard to dive into. +Some of this fame is warranted, some of it is purely accidental. There are three +important lessons that everyone willing to contribute should learn: + +* PyPy has layers. There are many pieces of architecture that are very well + separated from each other. More about this below, but often the manifestation + of this is that things are at a different layer than you would expect them + to be. For example if you are looking for the JIT implementation, you will + not find it in the implementation of the Python programming language. + +* Because of the above, we are very serious about Test Driven Development. + It's not only what we believe in, but also that PyPy's architecture is + working very well with TDD in mind and not so well without it. Often + development means progressing in an unrelated corner, one unittest + at a time; and then flipping a giant switch, bringing it all together. + (It generally works out of the box. If it doesn't, then we didn't + write enough unit tests.) It's worth repeating - PyPy's + approach is great if you do TDD, and not so great otherwise. + +* PyPy uses an entirely different set of tools - most of them included + in the PyPy repository. There is no Makefile, nor autoconf. More below. + +The first thing to remember is that PyPy project is very different than most +projects out there. It's also different from a classic compiler project, +so academic courses about compilers often don't apply or lead in the wrong +direction. However, if you want to understand how designing & building a runtime +works in the real world then this is a great project! + +Getting involved +^^^^^^^^^^^^^^^^ + +PyPy employs a relatively standard open-source development process. You are +encouraged as a first step to join our `pypy-dev mailing list`_ and IRC channel, +details of which can be found in our :ref:`contact ` section. The folks +there are very friendly, and can point you in the right direction. + +We give out commit rights usually fairly liberally, so if you want to do something +with PyPy, you can become a committer. We also run frequent coding sprints which +are separately announced and often happen around Python conferences such as +EuroPython or PyCon. Upcoming events are usually announced on `the blog`_. + +Further Reading: :ref:`Contact ` + +.. _the blog: http://morepypy.blogspot.com +.. _pypy-dev mailing list: http://mail.python.org/mailman/listinfo/pypy-dev + + +Your first contribution +^^^^^^^^^^^^^^^^^^^^^^^ + +The first and most important rule how **not** to contribute to PyPy is +"just hacking a feature". This won't work, and you'll find your PR will typically +require a lot of re-work. There are a few reasons why not: + +* build times are large +* PyPy has very thick layer separation +* context of the cPython runtime is often required + +Instead, reach out on the dev mailing list or the IRC channel, and we're more +than happy to help! :) + +Some ideas for first contributions are: + +* Documentation - this will give you an understanding of the pypy architecture +* Test failures - find a failing test in the `nightly builds`_, and fix it +* Missing language features - these are listed in our `issue tracker`_ + +.. _nightly builds: http://buildbot.pypy.org/nightly/ +.. _issue tracker: https://bitbucket.org/pypy/pypy/issues + +Source Control +-------------- + +PyPy development is based a typical fork/pull request based workflow, centered +around Mercurial (hg), hosted on Bitbucket. If you have not used this workflow +before, a good introduction can be found here: + + https://www.atlassian.com/git/tutorials/comparing-workflows/forking-workflow + +The cycle for a new PyPy contributor goes typically like this: + +Fork & Clone +------------ + +* Make an account on bitbucket_. + +* Go to https://bitbucket.org/pypy/pypy/ and click "fork" (left + icons). You get a fork of the repository, e.g. in + `https://bitbucket.org/yourname/pypy/`. + +* Clone your new repo (i.e. the fork) to your local machine with the command + ``hg clone ssh://hg at bitbucket.org/yourname/pypy``. It is a very slow + operation but only ever needs to be done once. See also + http://pypy.org/download.html#building-from-source . + If you already cloned + ``https://bitbucket.org/pypy/pypy`` before, even if some time ago, + then you can reuse the same clone by editing the file ``.hg/hgrc`` in + your clone to contain the line ``default = + ssh://hg at bitbucket.org/yourname/pypy``, and then do ``hg pull && hg + up``. If you already have such a clone but don't want to change it, + you can clone that copy with ``hg clone /path/to/other/copy``, and + then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``. + +* Now you have a complete copy of the PyPy repo. Make a branch + with a command like ``hg branch name_of_your_branch``. + +Edit +---- + +* Edit things. Use ``hg diff`` to see what you changed. Use ``hg add`` + to make Mercurial aware of new files you added, e.g. new test files. + Use ``hg status`` to see if there are such files. Write and run tests! + (See the rest of this page.) + +* Commit regularly with ``hg commit``. A one-line commit message is + fine. We love to have tons of commits; make one as soon as you have + some progress, even if it is only some new test that doesn't pass yet, + or fixing things even if not all tests pass. Step by step, you are + building the history of your changes, which is the point of a version + control system. (There are commands like ``hg log`` and ``hg up`` + that you should read about later, to learn how to navigate this + history.) + +* The commits stay on your machine until you do ``hg push`` to "push" + them back to the repo named in the file ``.hg/hgrc``. Repos are + basically just collections of commits (a commit is also called a + changeset): there is one repo per url, plus one for each local copy on + each local machine. The commands ``hg push`` and ``hg pull`` copy + commits around, with the goal that all repos in question end up with + the exact same set of commits. By opposition, ``hg up`` only updates + the "working copy" by reading the local repository, i.e. it makes the + files that you see correspond to the latest (or any other) commit + locally present. + +* You should push often; there is no real reason not to. Remember that + even if they are pushed, with the setup above, the commits are (1) + only in ``bitbucket.org/yourname/pypy``, and (2) in the branch you + named. Yes, they are publicly visible, but don't worry about someone + walking around the thousands of repos on bitbucket saying "hah, look + at the bad coding style of that guy". Try to get into the mindset + that your work is not secret and it's fine that way. We might not + accept it as is for PyPy, asking you instead to improve some things, + but we are not going to judge you. + +Pull Request +------------ + +* The final step is to open a pull request, so that we know that you'd + like to merge that branch back to the original ``pypy/pypy`` repo. + This can also be done several times if you have interesting + intermediate states, but if you get there, then we're likely to + proceed to the next stage, which is... + +* Get a regular account for pushing directly to + ``bitbucket.org/pypy/pypy`` (just ask and you'll get it, basically). + Once you have it you can rewrite your file ``.hg/hgrc`` to contain + ``default = ssh://hg at bitbucket.org/pypy/pypy``. Your changes will + then be pushed directly to the official repo, but (if you follow these + rules) they are still on a branch, and we can still review the + branches you want to merge. + +* If you get closer to the regular day-to-day development, you'll notice + that we generally push small changes as one or a few commits directly + to the branch ``default``. Also, we often collaborate even if we are + on other branches, which do not really "belong" to anyone. At this + point you'll need ``hg merge`` and learn how to resolve conflicts that + sometimes occur when two people try to push different commits in + parallel on the same branch. But it is likely an issue for later ``:-)`` + +.. _bitbucket: https://bitbucket.org/ + + +Architecture +^^^^^^^^^^^^ + +PyPy has layers. Just like ogres or onions. Those layers help us keep the +respective parts separated enough to be worked on independently and make the +complexity manageable. This is, again, just a sanity requirement for such +a complex project. For example writing a new optimization for the JIT usually +does **not** involve touching a Python interpreter at all or the JIT assembler +backend or the garbage collector. Instead it requires writing small tests in +``rpython/jit/metainterp/optimizeopt/test/test_*`` and fixing files there. +After that, you can just compile PyPy and things should just work. + +Further Reading: :doc:`architecture ` + +Where to start? +--------------- + +PyPy is made from parts that are relatively independent of each other. +You should start looking at the part that attracts you most (all paths are +relative to the PyPy top level directory). You may look at our +:doc:`directory reference ` or start off at one of the following +points: + +* :source:`pypy/interpreter` contains the bytecode interpreter: bytecode dispatcher + in :source:`pypy/interpreter/pyopcode.py`, frame and code objects in + :source:`pypy/interpreter/eval.py` and :source:`pypy/interpreter/pyframe.py`, + function objects and argument passing in :source:`pypy/interpreter/function.py` + and :source:`pypy/interpreter/argument.py`, the object space interface + definition in :source:`pypy/interpreter/baseobjspace.py`, modules in + :source:`pypy/interpreter/module.py` and :source:`pypy/interpreter/mixedmodule.py`. + Core types supporting the bytecode interpreter are defined in + :source:`pypy/interpreter/typedef.py`. + +* :source:`pypy/interpreter/pyparser` contains a recursive descent parser, + and grammar files that allow it to parse the syntax of various Python + versions. Once the grammar has been processed, the parser can be + translated by the above machinery into efficient code. + +* :source:`pypy/interpreter/astcompiler` contains the compiler. This + contains a modified version of the compiler package from CPython + that fixes some bugs and is translatable. + +* :source:`pypy/objspace/std` contains the + :ref:`Standard object space `. The main file + is :source:`pypy/objspace/std/objspace.py`. For each type, the file + ``xxxobject.py`` contains the implementation for objects of type ``xxx``, + as a first approximation. (Some types have multiple implementations.) + +Building +^^^^^^^^ + +For building PyPy, we recommend installing a pre-built PyPy first (see +:doc:`install`). It is possible to build PyPy with CPython, but it will take a +lot longer to run -- depending on your architecture, between two and three +times as long. + +Further Reading: :doc:`Build ` + +Coding Guide +------------ + +As well as the usual pep8 and formatting standards, there are a number of +naming conventions and coding styles that are important to understand before +browsing the source. + +Further Reading: :doc:`Coding Guide ` + +Testing +^^^^^^^ + +Test driven development +----------------------- + +Instead, we practice a lot of test driven development. This is partly because +of very high quality requirements for compilers and partly because there is +simply no other way to get around such complex project, that will keep you sane. +There are probably people out there who are smart enough not to need it, we're +not one of those. You may consider familiarizing yourself with `pytest`_, +since this is a tool we use for tests. +This leads to the next issue: + +.. _pytest: http://pytest.org/ + +py.test and the py lib +---------------------- + +The `py.test testing tool`_ drives all our testing needs. + +We use the `py library`_ for filesystem path manipulations, terminal +writing, logging and some other support functionality. + +You don't necessarily need to install these two libraries because +we also ship them inlined in the PyPy source tree. + +.. _py library: http://pylib.readthedocs.org/ + +Running PyPy's unit tests +------------------------- + +PyPy development always was and is still thoroughly test-driven. +We use the flexible `py.test testing tool`_ which you can `install independently +`_ and use for other projects. + +The PyPy source tree comes with an inlined version of ``py.test`` +which you can invoke by typing:: + + python pytest.py -h + +This is usually equivalent to using an installed version:: + + py.test -h + +If you encounter problems with the installed version +make sure you have the correct version installed which +you can find out with the ``--version`` switch. + +You will need the `build requirements`_ to run tests successfully, since many of +them compile little pieces of PyPy and then run the tests inside that minimal +interpreter. The `cpyext` tests also require `pycparser`, and many tests build +cases with `hypothesis`. + +Now on to running some tests. PyPy has many different test directories +and you can use shell completion to point at directories or files:: + + py.test pypy/interpreter/test/test_pyframe.py + + # or for running tests of a whole subdirectory + py.test pypy/interpreter/ + +See `py.test usage and invocations`_ for some more generic info +on how you can run tests. + +Beware trying to run "all" pypy tests by pointing to the root +directory or even the top level subdirectory ``pypy``. It takes +hours and uses huge amounts of RAM and is not recommended. + +To run CPython regression tests you can point to the ``lib-python`` +directory:: + + py.test lib-python/2.7/test/test_datetime.py + +This will usually take a long time because this will run +the PyPy Python interpreter on top of CPython. On the plus +side, it's usually still faster than doing a full translation +and running the regression test with the translated PyPy Python +interpreter. + +.. _py.test testing tool: http://pytest.org +.. _py.test usage and invocations: http://pytest.org/latest/usage.html#usage +.. _`build requirements`: build.html#install-build-time-dependencies + +Testing After Translation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the usual invocation of `pytest` translates a piece of RPython code and +runs it, we have a test extension to run tests without translation, directly +on the host python. This is very convenient for modules such as `cpyext`, to +compare and contrast test results between CPython and PyPy. Untranslated tests +are invoked by using the `-A` or `--runappdirect` option to `pytest`:: + + python2 pytest.py -A pypy/module/cpyext/test + +where `python2` can be either `python2` or `pypy2`. On the `py3` branch, the +collection phase must be run with `python2` so untranslated tests are run +with:: + + cpython2 pytest.py -A pypy/module/cpyext/test --python=path/to/pypy3 + + +Tooling & Utilities +^^^^^^^^^^^^^^^^^^^ + +If you are interested in the inner workings of the PyPy Python interpreter, +there are some features of the untranslated Python interpreter that allow you +to introspect its internals. + + +Interpreter-level console +------------------------- + +To start interpreting Python with PyPy, install a C compiler that is +supported by distutils and use Python 2.7 or greater to run PyPy:: + + cd pypy + python bin/pyinteractive.py + +After a few seconds (remember: this is running on top of CPython), you should +be at the PyPy prompt, which is the same as the Python prompt, but with an +extra ">". + +If you press + on the console you enter the interpreter-level console, a +usual CPython console. You can then access internal objects of PyPy +(e.g. the :ref:`object space `) and any variables you have created on the PyPy +prompt with the prefix ``w_``:: + + >>>> a = 123 + >>>> + *** Entering interpreter-level console *** + >>> w_a + W_IntObject(123) + +The mechanism works in both directions. If you define a variable with the ``w_`` prefix on the interpreter-level, you will see it on the app-level:: + + >>> w_l = space.newlist([space.wrap(1), space.wrap("abc")]) + >>> + *** Leaving interpreter-level console *** + + KeyboardInterrupt + >>>> l + [1, 'abc'] + +Note that the prompt of the interpreter-level console is only '>>>' since +it runs on CPython level. If you want to return to PyPy, press (under +Linux) or , (under Windows). + +Also note that not all modules are available by default in this mode (for +example: ``_continuation`` needed by ``greenlet``) , you may need to use one of +``--withmod-...`` command line options. + +You may be interested in reading more about the distinction between +:ref:`interpreter-level and app-level `. + +pyinteractive.py options +------------------------ + +To list the PyPy interpreter command line options, type:: + + cd pypy + python bin/pyinteractive.py --help + +pyinteractive.py supports most of the options that CPython supports too (in addition to a +large amount of options that can be used to customize pyinteractive.py). +As an example of using PyPy from the command line, you could type:: + + python pyinteractive.py --withmod-time -c "from test import pystone; pystone.main(10)" + +Alternatively, as with regular Python, you can simply give a +script name on the command line:: + + python pyinteractive.py --withmod-time ../../lib-python/2.7/test/pystone.py 10 + +The ``--withmod-xxx`` option enables the built-in module ``xxx``. By +default almost none of them are, because initializing them takes time. +If you want anyway to enable all built-in modules, you can use +``--allworkingmodules``. + +See our :doc:`configuration sections ` for details about what all the commandline +options do. + + +.. _trace example: + +Tracing bytecode and operations on objects +------------------------------------------ + +You can use a simple tracing mode to monitor the interpretation of +bytecodes. To enable it, set ``__pytrace__ = 1`` on the interactive +PyPy console:: + + >>>> __pytrace__ = 1 + Tracing enabled + >>>> x = 5 + : LOAD_CONST 0 (5) + : STORE_NAME 0 (x) + : LOAD_CONST 1 (None) + : RETURN_VALUE 0 + >>>> x + : LOAD_NAME 0 (x) + : PRINT_EXPR 0 + 5 + : LOAD_CONST 0 (None) + : RETURN_VALUE 0 + >>>> + + +Demos +^^^^^ + +The `example-interpreter`_ repository contains an example interpreter +written using the RPython translation toolchain. + +.. _example-interpreter: https://bitbucket.org/pypy/example-interpreter + + +graphviz & pygame for flow graph viewing (highly recommended) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +graphviz and pygame are both necessary if you want to look at generated flow +graphs: + + graphviz: http://www.graphviz.org/Download.php + + pygame: http://www.pygame.org/download.shtml + diff --git a/pypy/doc/contributor.rst b/pypy/doc/contributor.rst --- a/pypy/doc/contributor.rst +++ b/pypy/doc/contributor.rst @@ -214,6 +214,7 @@ Lukas Vacek Omer Katz Jacek Generowicz + Tomasz Dziopa Sylvain Thenault Jakub Stasiak Andrew Dalke @@ -274,6 +275,7 @@ Yury V. Zaytsev florinpapa Anders Sigfridsson + Matt Jackson Nikolay Zinov rafalgalczynski at gmail.com Joshua Gilbert diff --git a/pypy/doc/discussion/ctypes-implementation.rst b/pypy/doc/discussion/ctypes-implementation.rst --- a/pypy/doc/discussion/ctypes-implementation.rst +++ b/pypy/doc/discussion/ctypes-implementation.rst @@ -141,28 +141,3 @@ .. _pyglet: http://pyglet.org/ - -ctypes configure ------------------ - -We also released ``ctypes-configure``, which is an experimental package -trying to approach the portability issues of ctypes-based code. - -idea -~~~~ - -One of ctypes problems is that ctypes programs are usually not very -platform-independent. We created ctypes_configure, which invokes c -compiler (via distutils) for various platform-dependent details like -exact sizes of types (for example size_t), ``#defines``, exact outline of -structures etc. It replaces in this regard code generator (h2py). - -installation -~~~~~~~~~~~~ - -``easy_install ctypes_configure`` - -usage -~~~~~ - -:source:`ctypes_configure/doc/sample.py` explains in details how to use it. diff --git a/pypy/doc/embedding.rst b/pypy/doc/embedding.rst --- a/pypy/doc/embedding.rst +++ b/pypy/doc/embedding.rst @@ -1,5 +1,5 @@ -Embedding PyPy -============== +Embedding PyPy (DEPRECATED) +=========================== PyPy has a very minimal and a very strange embedding interface, based on the usage of `cffi`_ and the philosophy that Python is a better language than diff --git a/pypy/doc/eventhistory.rst b/pypy/doc/eventhistory.rst --- a/pypy/doc/eventhistory.rst +++ b/pypy/doc/eventhistory.rst @@ -40,11 +40,9 @@ Main focus of the sprint will be on the goals of the upcoming June 0.9 release. -Read more in `the sprint announcement`__, see who is planning to attend -on the `people page`_. +Read more about `the sprint`__ -__ https://bitbucket.org/pypy/extradoc/raw/tip/sprintinfo/ddorf2006/announce.html -.. _people page: https://bitbucket.org/pypy/extradoc/raw/tip/sprintinfo/ddorf2006/people.txt +__ https://bitbucket.org/pypy/extradoc/src/extradoc/sprintinfo/ddorf2006/ PyPy sprint at Akihabara (Tokyo, Japan) diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst --- a/pypy/doc/extradoc.rst +++ b/pypy/doc/extradoc.rst @@ -75,12 +75,12 @@ .. _A Way Forward in Parallelising Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2014/position-paper.pdf .. _Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf .. _Allocation Removal by Partial Evaluation in a Tracing JIT: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/pepm2011/bolz-allocation-removal.pdf -.. _Towards a Jitting VM for Prolog Execution: http://www.stups.uni-duesseldorf.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf +.. _Towards a Jitting VM for Prolog Execution: http://stups.hhu.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf .. _High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages: http://buildbot.pypy.org/misc/antocuni-thesis.pdf .. _How to *not* write Virtual Machines for Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/tip/talk/dyla2007/dyla.pdf .. _`Tracing the Meta-Level: PyPy's Tracing JIT Compiler`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009/bolz-tracing-jit.pdf .. _`Faster than C#: Efficient Implementation of Dynamic Languages on .NET`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009-dotnet/cli-jit.pdf -.. _Automatic JIT Compiler Generation with Runtime Partial Evaluation: http://stups.hhu.de/mediawiki/images/b/b9/Master_bolz.pdf +.. _Automatic JIT Compiler Generation with Runtime Partial Evaluation: https://www.researchgate.net/profile/Davide_Ancona/publication/252023163_Automatic_generation_of_JIT_compilers_for_dynamic_languages_in_NET/links/53f2098e0cf2bc0c40e70023/Automatic-generation-of-JIT-compilers-for-dynamic-languages-in-NET.pdf .. _`RPython: A Step towards Reconciling Dynamically and Statically Typed OO Languages`: http://www.disi.unige.it/person/AnconaD/papers/DynamicLanguages_abstracts.html#AACM-DLS07 .. _EU Reports: index-report.html .. _Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution: http://sabi.net/nriley/pubs/dls6-riley.pdf @@ -368,6 +368,6 @@ .. _LLVM: http://llvm.org/ .. _IronPython: http://ironpython.codeplex.com/ .. _Dynamic Native Optimization of Native Interpreters: http://people.csail.mit.edu/gregs/dynamorio.html -.. _JikesRVM: http://jikesrvm.org/ +.. _JikesRVM: http://www.jikesrvm.org/ .. _Tunes: http://tunes.org .. _old Tunes Wiki: http://buildbot.pypy.org/misc/cliki.tunes.org/ diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -67,7 +67,7 @@ you may need to run the command with `sudo` for a global installation. The other commands of ``setup.py`` are available too, like ``build``. -.. _PyPI: https://pypi.python.org/pypi +.. _PyPI: https://pypi.org .. _`use virtualenv (as documented here)`: install.html#installing-using-virtualenv @@ -360,7 +360,7 @@ (produced during a sprint). On the `PyPy bitbucket page`_ there is also a Scheme and an Io implementation; both of these are unfinished at the moment. -.. _Topaz: http://topazruby.com/ +.. _Topaz: http://docs.topazruby.com/en/latest/ .. _Hippy: http://morepypy.blogspot.ch/2012/07/hello-everyone.html .. _JavaScript interpreter: https://bitbucket.org/pypy/lang-js/ .. _Prolog interpreter: https://bitbucket.org/cfbolz/pyrolog/ diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst --- a/pypy/doc/gc_info.rst +++ b/pypy/doc/gc_info.rst @@ -121,6 +121,166 @@ alive by GC objects, but not accounted in the GC +GC Hooks +-------- + +GC hooks are user-defined functions which are called whenever a specific GC +event occur, and can be used to monitor GC activity and pauses. You can +install the hooks by setting the following attributes: + +``gc.hook.on_gc_minor`` + Called whenever a minor collection occurs. It corresponds to + ``gc-minor`` sections inside ``PYPYLOG``. + +``gc.hook.on_gc_collect_step`` + Called whenever an incremental step of a major collection occurs. It + corresponds to ``gc-collect-step`` sections inside ``PYPYLOG``. + +``gc.hook.on_gc_collect`` + Called after the last incremental step, when a major collection is fully + done. It corresponds to ``gc-collect-done`` sections inside ``PYPYLOG``. + +To uninstall a hook, simply set the corresponding attribute to ``None``. To +install all hooks at once, you can call ``gc.hooks.set(obj)``, which will look +for methods ``on_gc_*`` on ``obj``. To uninstall all the hooks at once, you +can call ``gc.hooks.reset()``. + +The functions called by the hooks receive a single ``stats`` argument, which +contains various statistics about the event. + +Note that PyPy cannot call the hooks immediately after a GC event, but it has +to wait until it reaches a point in which the interpreter is in a known state +and calling user-defined code is harmless. It might happen that multiple +events occur before the hook is invoked: in this case, you can inspect the +value ``stats.count`` to know how many times the event occurred since the last +time the hook was called. Similarly, ``stats.duration`` contains the +**total** time spent by the GC for this specific event since the last time the +hook was called. + +On the other hand, all the other fields of the ``stats`` object are relative +only to the **last** event of the series. + +The attributes for ``GcMinorStats`` are: + +``count`` + The number of minor collections occurred since the last hook call. + +``duration`` + The total time spent inside minor collections since the last hook + call. See below for more information on the unit. + +``duration_min`` + The duration of the fastest minor collection since the last hook call. + +``duration_max`` + The duration of the slowest minor collection since the last hook call. + + ``total_memory_used`` + The amount of memory used at the end of the minor collection, in + bytes. This include the memory used in arenas (for GC-managed memory) and + raw-malloced memory (e.g., the content of numpy arrays). + +``pinned_objects`` + the number of pinned objects. + + +The attributes for ``GcCollectStepStats`` are: + +``count``, ``duration``, ``duration_min``, ``duration_max`` + See above. + +``oldstate``, ``newstate`` + Integers which indicate the state of the GC before and after the step. + +The value of ``oldstate`` and ``newstate`` is one of these constants, defined +inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``, +``STATE_SWEEPING``, ``STATE_FINALIZING``. It is possible to get a string +representation of it by indexing the ``GC_STATS`` tuple. + + +The attributes for ``GcCollectStats`` are: + +``count`` + See above. + +``num_major_collects`` + The total number of major collections which have been done since the + start. Contrarily to ``count``, this is an always-growing counter and it's + not reset between invocations. + +``arenas_count_before``, ``arenas_count_after`` + Number of arenas used before and after the major collection. + +``arenas_bytes`` + Total number of bytes used by GC-managed objects. + +``rawmalloc_bytes_before``, ``rawmalloc_bytes_after`` + Total number of bytes used by raw-malloced objects, before and after the + major collection. + +Note that ``GcCollectStats`` has **not** got a ``duration`` field. This is +because all the GC work is done inside ``gc-collect-step``: +``gc-collect-done`` is used only to give additional stats, but doesn't do any +actual work. + +A note about the ``duration`` field: depending on the architecture and +operating system, PyPy uses different ways to read timestamps, so ``duration`` +is expressed in varying units. It is possible to know which by calling +``__pypy__.debug_get_timestamp_unit()``, which can be one of the following +values: + +``tsc`` + The default on ``x86`` machines: timestamps are expressed in CPU ticks, as + read by the `Time Stamp Counter`_. + +``ns`` + Timestamps are expressed in nanoseconds. + +``QueryPerformanceCounter`` + On Windows, in case for some reason ``tsc`` is not available: timestamps + are read using the win API ``QueryPerformanceCounter()``. + + +Unfortunately, there does not seem to be a reliable standard way for +converting ``tsc`` ticks into nanoseconds, although in practice on modern CPUs +it is enough to divide the ticks by the maximum nominal frequency of the CPU. +For this reason, PyPy gives the raw value, and leaves the job of doing the +conversion to external libraries. + +Here is an example of GC hooks in use:: + + import sys + import gc + + class MyHooks(object): + done = False + + def on_gc_minor(self, stats): + print 'gc-minor: count = %02d, duration = %d' % (stats.count, + stats.duration) + + def on_gc_collect_step(self, stats): + old = gc.GcCollectStepStats.GC_STATES[stats.oldstate] + new = gc.GcCollectStepStats.GC_STATES[stats.newstate] + print 'gc-collect-step: %s --> %s' % (old, new) + print ' count = %02d, duration = %d' % (stats.count, + stats.duration) + + def on_gc_collect(self, stats): + print 'gc-collect-done: count = %02d' % stats.count + self.done = True + + hooks = MyHooks() + gc.hooks.set(hooks) + + # simulate some GC activity + lst = [] + while not hooks.done: + lst = [lst, 1, 2, 3] + + +.. _`Time Stamp Counter`: https://en.wikipedia.org/wiki/Time_Stamp_Counter + .. _minimark-environment-variables: Environment variables diff --git a/pypy/doc/getting-started-dev.rst b/pypy/doc/getting-started-dev.rst deleted file mode 100644 --- a/pypy/doc/getting-started-dev.rst +++ /dev/null @@ -1,345 +0,0 @@ -Getting Started Developing With PyPy -==================================== - -.. contents:: - - -Using Mercurial ---------------- - -PyPy development is based on Mercurial (hg). If you are not used to -version control, the cycle for a new PyPy contributor goes typically -like this: - -* Make an account on bitbucket_. - -* Go to https://bitbucket.org/pypy/pypy/ and click "fork" (left - icons). You get a fork of the repository, e.g. in - https://bitbucket.org/yourname/pypy/. - -* Clone this new repo (i.e. the fork) to your local machine with the command - ``hg clone ssh://hg at bitbucket.org/yourname/pypy``. It is a very slow - operation but only ever needs to be done once. See also - http://pypy.org/download.html#building-from-source . - If you already cloned - ``https://bitbucket.org/pypy/pypy`` before, even if some time ago, - then you can reuse the same clone by editing the file ``.hg/hgrc`` in - your clone to contain the line ``default = - ssh://hg at bitbucket.org/yourname/pypy``, and then do ``hg pull && hg - up``. If you already have such a clone but don't want to change it, - you can clone that copy with ``hg clone /path/to/other/copy``, and - then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``. - -* Now you have a complete copy of the PyPy repo. Make a branch - with a command like ``hg branch name_of_your_branch``. - -* Edit things. Use ``hg diff`` to see what you changed. Use ``hg add`` - to make Mercurial aware of new files you added, e.g. new test files. - Use ``hg status`` to see if there are such files. Write and run tests! - (See the rest of this page.) - -* Commit regularly with ``hg commit``. A one-line commit message is - fine. We love to have tons of commits; make one as soon as you have - some progress, even if it is only some new test that doesn't pass yet, - or fixing things even if not all tests pass. Step by step, you are - building the history of your changes, which is the point of a version - control system. (There are commands like ``hg log`` and ``hg up`` - that you should read about later, to learn how to navigate this - history.) - -* The commits stay on your machine until you do ``hg push`` to "push" - them back to the repo named in the file ``.hg/hgrc``. Repos are - basically just collections of commits (a commit is also called a - changeset): there is one repo per url, plus one for each local copy on - each local machine. The commands ``hg push`` and ``hg pull`` copy - commits around, with the goal that all repos in question end up with - the exact same set of commits. By opposition, ``hg up`` only updates - the "working copy" by reading the local repository, i.e. it makes the - files that you see correspond to the latest (or any other) commit - locally present. - -* You should push often; there is no real reason not to. Remember that - even if they are pushed, with the setup above, the commits are (1) - only in ``bitbucket.org/yourname/pypy``, and (2) in the branch you - named. Yes, they are publicly visible, but don't worry about someone - walking around the thousands of repos on bitbucket saying "hah, look - at the bad coding style of that guy". Try to get into the mindset - that your work is not secret and it's fine that way. We might not - accept it as is for PyPy, asking you instead to improve some things, - but we are not going to judge you. - -* The final step is to open a pull request, so that we know that you'd - like to merge that branch back to the original ``pypy/pypy`` repo. - This can also be done several times if you have interesting - intermediate states, but if you get there, then we're likely to - proceed to the next stage, which is... - -* Get a regular account for pushing directly to - ``bitbucket.org/pypy/pypy`` (just ask and you'll get it, basically). - Once you have it you can rewrite your file ``.hg/hgrc`` to contain - ``default = ssh://hg at bitbucket.org/pypy/pypy``. Your changes will - then be pushed directly to the official repo, but (if you follow these - rules) they are still on a branch, and we can still review the - branches you want to merge. - -* If you get closer to the regular day-to-day development, you'll notice - that we generally push small changes as one or a few commits directly - to the branch ``default``. Also, we often collaborate even if we are - on other branches, which do not really "belong" to anyone. At this - point you'll need ``hg merge`` and learn how to resolve conflicts that - sometimes occur when two people try to push different commits in - parallel on the same branch. But it is likely an issue for later ``:-)`` - -.. _bitbucket: https://bitbucket.org/ - - -Running PyPy's unit tests -------------------------- - -PyPy development always was and is still thoroughly test-driven. -We use the flexible `py.test testing tool`_ which you can `install independently -`_ and use for other projects. - -The PyPy source tree comes with an inlined version of ``py.test`` -which you can invoke by typing:: - - python pytest.py -h - -This is usually equivalent to using an installed version:: - - py.test -h - -If you encounter problems with the installed version -make sure you have the correct version installed which -you can find out with the ``--version`` switch. - -You will need the `build requirements`_ to run tests successfully, since many of -them compile little pieces of PyPy and then run the tests inside that minimal -interpreter - -Now on to running some tests. PyPy has many different test directories -and you can use shell completion to point at directories or files:: - - py.test pypy/interpreter/test/test_pyframe.py - - # or for running tests of a whole subdirectory - py.test pypy/interpreter/ - -See `py.test usage and invocations`_ for some more generic info -on how you can run tests. - -Beware trying to run "all" pypy tests by pointing to the root -directory or even the top level subdirectory ``pypy``. It takes -hours and uses huge amounts of RAM and is not recommended. - -To run CPython regression tests you can point to the ``lib-python`` -directory:: - - py.test lib-python/2.7/test/test_datetime.py - -This will usually take a long time because this will run -the PyPy Python interpreter on top of CPython. On the plus -side, it's usually still faster than doing a full translation -and running the regression test with the translated PyPy Python -interpreter. - -.. _py.test testing tool: http://pytest.org -.. _py.test usage and invocations: http://pytest.org/latest/usage.html#usage -.. _`build requirements`: build.html#install-build-time-dependencies - -Special Introspection Features of the Untranslated Python Interpreter ---------------------------------------------------------------------- - -If you are interested in the inner workings of the PyPy Python interpreter, -there are some features of the untranslated Python interpreter that allow you -to introspect its internals. - - -Interpreter-level console -~~~~~~~~~~~~~~~~~~~~~~~~~ - -To start interpreting Python with PyPy, install a C compiler that is -supported by distutils and use Python 2.7 or greater to run PyPy:: - - cd pypy - python bin/pyinteractive.py - -After a few seconds (remember: this is running on top of CPython), you should -be at the PyPy prompt, which is the same as the Python prompt, but with an -extra ">". - -If you press - on the console you enter the interpreter-level console, a -usual CPython console. You can then access internal objects of PyPy -(e.g. the :ref:`object space `) and any variables you have created on the PyPy -prompt with the prefix ``w_``:: - - >>>> a = 123 - >>>> - *** Entering interpreter-level console *** - >>> w_a - W_IntObject(123) - -The mechanism works in both directions. If you define a variable with the ``w_`` prefix on the interpreter-level, you will see it on the app-level:: - - >>> w_l = space.newlist([space.wrap(1), space.wrap("abc")]) - >>> - *** Leaving interpreter-level console *** - - KeyboardInterrupt - >>>> l - [1, 'abc'] - -Note that the prompt of the interpreter-level console is only '>>>' since -it runs on CPython level. If you want to return to PyPy, press (under -Linux) or , (under Windows). - -Also note that not all modules are available by default in this mode (for -example: ``_continuation`` needed by ``greenlet``) , you may need to use one of -``--withmod-...`` command line options. - -You may be interested in reading more about the distinction between -:ref:`interpreter-level and app-level `. - -pyinteractive.py options -~~~~~~~~~~~~~~~~~~~~~~~~ - -To list the PyPy interpreter command line options, type:: - - cd pypy - python bin/pyinteractive.py --help - -pyinteractive.py supports most of the options that CPython supports too (in addition to a -large amount of options that can be used to customize pyinteractive.py). -As an example of using PyPy from the command line, you could type:: - - python pyinteractive.py --withmod-time -c "from test import pystone; pystone.main(10)" - -Alternatively, as with regular Python, you can simply give a -script name on the command line:: - - python pyinteractive.py --withmod-time ../../lib-python/2.7/test/pystone.py 10 - -The ``--withmod-xxx`` option enables the built-in module ``xxx``. By -default almost none of them are, because initializing them takes time. -If you want anyway to enable all built-in modules, you can use -``--allworkingmodules``. - -See our :doc:`configuration sections ` for details about what all the commandline -options do. - - -.. _trace example: - -Tracing bytecode and operations on objects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can use a simple tracing mode to monitor the interpretation of -bytecodes. To enable it, set ``__pytrace__ = 1`` on the interactive -PyPy console:: - - >>>> __pytrace__ = 1 - Tracing enabled - >>>> x = 5 - : LOAD_CONST 0 (5) - : STORE_NAME 0 (x) - : LOAD_CONST 1 (None) - : RETURN_VALUE 0 - >>>> x - : LOAD_NAME 0 (x) - : PRINT_EXPR 0 - 5 - : LOAD_CONST 0 (None) - : RETURN_VALUE 0 - >>>> - - -Demos ------ - -The `example-interpreter`_ repository contains an example interpreter -written using the RPython translation toolchain. - -.. _example-interpreter: https://bitbucket.org/pypy/example-interpreter - - -Additional Tools for running (and hacking) PyPy ------------------------------------------------ - -We use some optional tools for developing PyPy. They are not required to run -the basic tests or to get an interactive PyPy prompt but they help to -understand and debug PyPy especially for the translation process. - - -graphviz & pygame for flow graph viewing (highly recommended) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -graphviz and pygame are both necessary if you -want to look at generated flow graphs: - - graphviz: http://www.graphviz.org/Download.php - - pygame: http://www.pygame.org/download.shtml - - -py.test and the py lib -~~~~~~~~~~~~~~~~~~~~~~ - -The `py.test testing tool`_ drives all our testing needs. - -We use the `py library`_ for filesystem path manipulations, terminal -writing, logging and some other support functionality. - -You don't necessarily need to install these two libraries because -we also ship them inlined in the PyPy source tree. - -.. _py library: http://pylib.readthedocs.org/ - - -Getting involved ----------------- - -PyPy employs an open development process. You are invited to join our -`pypy-dev mailing list`_ or look at the other :ref:`contact -possibilities `. Usually we give out commit rights fairly liberally, so if you -want to do something with PyPy, you can become a committer. We also run frequent -coding sprints which are separately announced and often happen around Python -conferences such as EuroPython or PyCon. Upcoming events are usually announced -on `the blog`_. - -.. _the blog: http://morepypy.blogspot.com -.. _pypy-dev mailing list: http://mail.python.org/mailman/listinfo/pypy-dev - - -.. _start-reading-sources: - -Where to start reading the sources ----------------------------------- - -PyPy is made from parts that are relatively independent of each other. -You should start looking at the part that attracts you most (all paths are -relative to the PyPy top level directory). You may look at our :doc:`directory reference ` -or start off at one of the following points: - -* :source:`pypy/interpreter` contains the bytecode interpreter: bytecode dispatcher - in :source:`pypy/interpreter/pyopcode.py`, frame and code objects in - :source:`pypy/interpreter/eval.py` and :source:`pypy/interpreter/pyframe.py`, - function objects and argument passing in :source:`pypy/interpreter/function.py` - and :source:`pypy/interpreter/argument.py`, the object space interface - definition in :source:`pypy/interpreter/baseobjspace.py`, modules in - :source:`pypy/interpreter/module.py` and :source:`pypy/interpreter/mixedmodule.py`. - Core types supporting the bytecode interpreter are defined in :source:`pypy/interpreter/typedef.py`. - -* :source:`pypy/interpreter/pyparser` contains a recursive descent parser, - and grammar files that allow it to parse the syntax of various Python - versions. Once the grammar has been processed, the parser can be - translated by the above machinery into efficient code. - -* :source:`pypy/interpreter/astcompiler` contains the compiler. This - contains a modified version of the compiler package from CPython - that fixes some bugs and is translatable. - -* :source:`pypy/objspace/std` contains the :ref:`Standard object space `. The main file - is :source:`pypy/objspace/std/objspace.py`. For each type, the file - ``xxxobject.py`` contains the implementation for objects of type ``xxx``, - as a first approximation. (Some types have multiple implementations.) diff --git a/pypy/doc/how-to-contribute.rst b/pypy/doc/how-to-contribute.rst deleted file mode 100644 --- a/pypy/doc/how-to-contribute.rst +++ /dev/null @@ -1,93 +0,0 @@ -How to contribute to PyPy -========================= - -This page describes how to contribute to the PyPy project. The first thing -to remember is that PyPy project is very different than most projects out there. -It's also different from a classic compiler project, so academic courses -about compilers often don't apply or lead in the wrong direction. - - -Don't just hack ---------------- - -The first and most important rule how not to contribute to PyPy is -"just hacking". This won't work. There are two major reasons why not --- build times are large and PyPy has very thick layer separation which -make it harder to "just hack a feature". - - -Test driven development ------------------------ - -Instead, we practice a lot of test driven development. This is partly because -of very high quality requirements for compilers and partly because there is -simply no other way to get around such complex project, that will keep you sane. -There are probably people out there who are smart enough not to need it, we're -not one of those. You may consider familiarizing yourself with `pytest`_, -since this is a tool we use for tests. -This leads to the next issue: - -.. _pytest: http://pytest.org/ - - -Layers ------- - -PyPy has layers. Just like Ogres or onions. -Those layers help us keep the respective parts separated enough -to be worked on independently and make the complexity manageable. This is, -again, just a sanity requirement for such a complex project. For example writing -a new optimization for the JIT usually does **not** involve touching a Python -interpreter at all or the JIT assembler backend or the garbage collector. -Instead it requires writing small tests in -``rpython/jit/metainterp/optimizeopt/test/test_*`` and fixing files there. -After that, you can just compile PyPy and things should just work. - -The short list of layers for further reading. For each of those layers, a good -entry point is a test subdirectory in respective directories. It usually -describes (better or worse) the interfaces between the submodules. For the -``pypy`` subdirectory, most tests are small snippets of python programs that -check for correctness (calls ``AppTestXxx``) that will call the appropriate -part of the interpreter. For the ``rpython`` directory, most tests are small -RPython interpreters that perform certain tasks. To see how they translate -to low-level graphs, run them with ``--view``. To see small interpreters -with a JIT compiler, use ``--viewloops`` option. - -* **python interpreter** - it's the part implemented in the ``pypy/`` directory. - It's implemented in RPython, which is a high level static language with - classes, garbage collection, just-in-time compiler generation and the ability - to call C. A cool part about it is that it can be run untranslated, so all - the tests are runnable without translating PyPy. - - **interpreter** contains the interpreter core - - **objspace** contains implementations of various objects exported to - the Python layer - - **module** directory contains extension modules written in RPython - -* **rpython compiler** that resides in ``rpython/annotator`` and - ``rpython/rtyper`` directories. Consult `Getting Started with RPython`_ - for further reading - -* **JIT generator** lives in ``rpython/jit`` directory. optimizations live - in ``rpython/jit/metainterp/optimizeopt``, the main JIT in - ``rpython/jit/metainterp`` (runtime part) and - ``rpython/jit/codewriter`` (translation-time part). Backends live in - ``rpython/jit/backend``. - -* **garbage collection** lives in ``rpython/memory`` - -The rest of directories serve specific niche goal and are unlikely a good -entry point. - - -More documentation ------------------- - -* `Getting Started Developing With PyPy`_ - -* `Getting Started with RPython`_ - -.. _`Getting Started Developing With PyPy`: getting-started-dev.html -.. _`Getting started with RPython`: http://rpython.readthedocs.org/en/latest/getting-started.html diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -34,6 +34,7 @@ whatsnew-2.0.0-beta1.rst whatsnew-1.9.rst + CPython 3.5 compatible versions ------------------------------- diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -9,7 +9,7 @@ * If you're interested in trying PyPy out, check out the :doc:`installation instructions `. -* If you want to help develop PyPy, please have a look at :doc:`how to contribute ` +* If you want to help develop PyPy, please have a look at :doc:`contributing ` and get in touch (:ref:`contact`)! All of the documentation and source code is available under the MIT license, @@ -31,6 +31,7 @@ introduction install build + windows faq @@ -40,43 +41,30 @@ ---------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 cpython_differences extending - embedding gc_info jit-hooks stackless __pypy__-module - objspace-proxies sandbox stm - windows - -.. _developing-pypy: - -Development documentation -------------------------- +Development +----------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 - getting-started-dev - how-to-contribute - you-want-to-help + contributing architecture configuration project-ideas project-documentation how-to-release -.. TODO: audit ^^ - - -.. TODO: Fill this in - Further resources ----------------- @@ -84,13 +72,10 @@ .. toctree:: :maxdepth: 1 - extradoc - eventhistory - discussions index-of-release-notes index-of-whatsnew contributor - + glossary .. _contact: @@ -118,7 +103,7 @@ the `development mailing list`_. .. _#pypy on irc.freenode.net: irc://irc.freenode.net/pypy -.. _here: http://www.tismer.com/pypy/irc-logs/pypy/ +.. _here: https://botbot.me/freenode/pypy/ .. _Development mailing list: http://mail.python.org/mailman/listinfo/pypy-dev .. _Commit mailing list: http://mail.python.org/mailman/listinfo/pypy-commit .. _Development bug/feature tracker: https://bitbucket.org/pypy/pypy/issues diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not diff --git a/pypy/doc/interpreter.rst b/pypy/doc/interpreter.rst --- a/pypy/doc/interpreter.rst +++ b/pypy/doc/interpreter.rst @@ -102,7 +102,7 @@ program flows with homogeneous name-value assignments on function invocations. -.. _how-to guide for descriptors: http://users.rcn.com/python/download/Descriptor.htm +.. _how-to guide for descriptors: https://docs.python.org/3/howto/descriptor.html Bytecode Interpreter Implementation Classes diff --git a/pypy/doc/man/pypy3.1.rst b/pypy/doc/man/pypy3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/man/pypy3.1.rst @@ -0,0 +1,135 @@ +======= + pypy3 +======= + +.. note: this is turned into a regular man page "pypy3.1" by + doing "make man" in pypy/doc/ + +SYNOPSIS +======== + +``pypy3`` [*options*] +[``-c`` *cmd*\ \|\ ``-m`` *mod*\ \|\ *file.py*\ \|\ ``-``\ ] +[*arg*\ ...] + +OPTIONS +======= + +-i + Inspect interactively after running script. + +-O + Skip assert statements. + +-OO + Remove docstrings when importing modules in addition to ``-O``. + +-c CMD + Program passed in as ``CMD`` (terminates option list). + +-S + Do not ``import site`` on initialization. + +-s + Don't add the user site directory to `sys.path`. + +-u + Unbuffered binary ``stdout`` and ``stderr``. + +-h, --help + Show a help message and exit. + +-m MOD + Library module to be run as a script (terminates option list). + +-W ARG + Warning control (*arg* is *action*:*message*:*category*:*module*:*lineno*). + +-E + Ignore environment variables (such as ``PYTHONPATH``). + +-B + Disable writing bytecode (``.pyc``) files. + +-X track-resources + Produce a ``ResourceWarning`` whenever a file or socket is closed by the + garbage collector. + +--version + Print the PyPy version. + +--info + Print translation information about this PyPy executable. + +--jit ARG + Low level JIT parameters. Mostly internal. Run ``--jit help`` + for more information. + +ENVIRONMENT +=========== + +``PYTHONPATH`` + Add directories to pypy3's module search path. + The format is the same as shell's ``PATH``. + +``PYTHONSTARTUP`` + A script referenced by this variable will be executed before the + first prompt is displayed, in interactive mode. + +``PYTHONDONTWRITEBYTECODE`` + If set to a non-empty value, equivalent to the ``-B`` option. + Disable writing ``.pyc`` files. + +``PYTHONINSPECT`` + If set to a non-empty value, equivalent to the ``-i`` option. + Inspect interactively after running the specified script. + +``PYTHONIOENCODING`` + If this is set, it overrides the encoding used for + *stdin*/*stdout*/*stderr*. + The syntax is *encodingname*:*errorhandler* + The *errorhandler* part is optional and has the same meaning as in + `str.encode`. + +``PYTHONNOUSERSITE`` + If set to a non-empty value, equivalent to the ``-s`` option. + Don't add the user site directory to `sys.path`. + +``PYTHONWARNINGS`` + If set, equivalent to the ``-W`` option (warning control). + The value should be a comma-separated list of ``-W`` parameters. + +``PYPYLOG`` + If set to a non-empty value, enable logging, the format is: + + *fname* or *+fname* + logging for profiling: includes all + ``debug_start``/``debug_stop`` but not any nested + ``debug_print``. + *fname* can be ``-`` to log to *stderr*. + The *+fname* form can be used if there is a *:* in fname + + ``:``\ *fname* + Full logging, including ``debug_print``. + + *prefix*\ ``:``\ *fname* + Conditional logging. + Multiple prefixes can be specified, comma-separated. + Only sections whose name match the prefix will be logged. + + ``PYPYLOG=jit-log-opt,jit-backend:logfile`` will + generate a log suitable for *jitviewer*, a tool for debugging + performance issues under PyPy. + +``PYPY_IRC_TOPIC`` + If set to a non-empty value, print a random #pypy IRC + topic at startup of interactive mode. + + +.. include:: ../gc_info.rst + :start-line: 7 + +SEE ALSO +======== + +**python3**\ (1) diff --git a/pypy/doc/objspace-proxies.rst b/pypy/doc/objspace-proxies.rst --- a/pypy/doc/objspace-proxies.rst +++ b/pypy/doc/objspace-proxies.rst @@ -1,28 +1,7 @@ -What PyPy can do for your objects -================================= - -.. contents:: - - -Thanks to the :doc:`Object Space ` architecture, any feature that is -based on proxying, extending, changing or otherwise controlling the -behavior of objects in a running program is easy to implement on top of PyPy. - -Here is what we have implemented so far, in historical order: - -* *Dump Object Space*: dumps all operations performed on all the objects - into a large log file. For debugging your applications. - -* *Transparent Proxies Extension*: adds new proxy objects to - the Standard Object Space that enable applications to - control operations on application and builtin objects, - e.g lists, dictionaries, tracebacks. - - .. _tproxy: -Transparent Proxies -------------------- +Transparent Proxies (DEPRECATED) +-------------------------------- .. warning:: @@ -194,7 +173,7 @@ application-level code. Transparent proxies are implemented on top of the :ref:`standard object -space `, in :source:`pypy/objspace/std/proxy_helpers.py`, +space `, in :source:`pypy/objspace/std/proxyobject.py`, :source:`pypy/objspace/std/proxyobject.py` and :source:`pypy/objspace/std/transparent.py`. To use them you will need to pass a `--objspace-std-withtproxy`_ option to ``pypy`` or ``translate.py``. This registers implementations named :py:class:`W_TransparentXxx` diff --git a/pypy/doc/objspace.rst b/pypy/doc/objspace.rst --- a/pypy/doc/objspace.rst +++ b/pypy/doc/objspace.rst @@ -474,8 +474,8 @@ :source:`pypy/objspace/std/bytesobject.py` defines ``W_AbstractBytesObject``, which contains everything needed to build the ``str`` app-level type; and there are subclasses ``W_BytesObject`` (the usual string) and -``W_StringBufferObject`` (a special implementation tweaked for repeated -additions, in :source:`pypy/objspace/std/strbufobject.py`). For mutable data +``W_Buffer`` (a special implementation tweaked for repeated +additions, in :source:`pypy/objspace/std/bufferobject.py`). For mutable data types like lists and dictionaries, we have a single class From pypy.commits at gmail.com Tue Jun 5 14:36:29 2018 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 05 Jun 2018 11:36:29 -0700 (PDT) Subject: [pypy-commit] pypy default: make the parser only recognize variables when using the reverse debugger Message-ID: <5b16d82d.1c69fb81.92ae1.1429@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r94725:1fc65745ebb4 Date: 2018-06-05 20:35 +0200 http://bitbucket.org/pypy/pypy/changeset/1fc65745ebb4/ Log: make the parser only recognize variables when using the reverse debugger diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -27,7 +27,7 @@ generator._resolve_block_targets(blocks) return generator, blocks -class TestCompiler: +class BaseTestCompiler: """These tests compile snippets of code and check them by running them with our own interpreter. These are thus not completely *unit* tests, but given that our interpreter is @@ -74,6 +74,9 @@ def error_test(self, source, exc_type): py.test.raises(exc_type, self.simple_test, source, None, None) + +class TestCompiler(BaseTestCompiler): + def test_issue_713(self): func = "def f(_=2): return (_ if _ else _) if False else _" yield self.st, func, "f()", 2 @@ -953,9 +956,11 @@ yield (self.st, "x=(lambda: (-0.0, 0.0), lambda: (0.0, -0.0))[1]()", 'repr(x)', '(0.0, -0.0)') +class TestCompilerRevDB(BaseTestCompiler): + spaceconfig = {"translation.reverse_debugger": True} + def test_revdb_metavar(self): from pypy.interpreter.reverse_debugging import dbstate, setup_revdb - self.space.config.translation.reverse_debugger = True self.space.reverse_debugging = True try: setup_revdb(self.space) diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -28,6 +28,7 @@ new.symbol_ids = self.symbol_ids new.symbols_names = self.symbol_names new.keyword_ids = self.keyword_ids + new.token_to_error_string = self.token_to_error_string new.dfas = self.dfas new.labels = self.labels new.token_ids = self.token_ids diff --git a/pypy/interpreter/pyparser/pygram.py b/pypy/interpreter/pyparser/pygram.py --- a/pypy/interpreter/pyparser/pygram.py +++ b/pypy/interpreter/pyparser/pygram.py @@ -23,6 +23,17 @@ python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy() del python_grammar_no_print.keyword_ids["print"] +python_grammar_revdb = python_grammar.shared_copy() +python_grammar_no_print_revdb = python_grammar_no_print.shared_copy() +copied_token_ids = python_grammar.token_ids.copy() +python_grammar_revdb.token_ids = copied_token_ids +python_grammar_no_print_revdb.token_ids = copied_token_ids + +metavar_token_id = pytoken.python_tokens['REVDBMETAVAR'] +# the following line affects python_grammar_no_print too, since they share the +# dict +del python_grammar.token_ids[metavar_token_id] + class _Tokens(object): pass for tok_name, idx in pytoken.python_tokens.iteritems(): @@ -39,3 +50,16 @@ syms._rev_lookup = rev_lookup # for debugging del _get_python_grammar, _Tokens, tok_name, sym_name, idx + +def choose_grammar(print_function, revdb): + if print_function: + if revdb: + return python_grammar_no_print_revdb + else: + return python_grammar_no_print + else: + if revdb: + return python_grammar_revdb + else: + return python_grammar + diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -160,10 +160,9 @@ compile_info.last_future_import = last_future_import compile_info.flags |= newflags - if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION: - self.grammar = pygram.python_grammar_no_print - else: - self.grammar = pygram.python_grammar + self.grammar = pygram.choose_grammar( + print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, + revdb=self.space.config.translation.reverse_debugger) for tp, value, lineno, column, line in tokens: if self.add_token(tp, value, lineno, column, line): diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -168,13 +168,11 @@ assert expected_tree == tree def test_revdb_dollar_num(self): - self.parse('$0') - self.parse('$5') - self.parse('$42') - self.parse('2+$42.attrname') - py.test.raises(SyntaxError, self.parse, '$') - py.test.raises(SyntaxError, self.parse, '$a') - py.test.raises(SyntaxError, self.parse, '$.5') + assert not self.space.config.translation.reverse_debugger + py.test.raises(SyntaxError, self.parse, '$0') + py.test.raises(SyntaxError, self.parse, '$0 + 5') + py.test.raises(SyntaxError, self.parse, + "from __future__ import print_function\nx = ($0, print)") def test_error_forgotten_chars(self): info = py.test.raises(SyntaxError, self.parse, "if 1\n print 4") @@ -183,3 +181,18 @@ assert "(expected ':')" in info.value.msg info = py.test.raises(SyntaxError, self.parse, "def f:\n print 1") assert "(expected '(')" in info.value.msg + +class TestPythonParserRevDB(TestPythonParser): + spaceconfig = {"translation.reverse_debugger": True} + + def test_revdb_dollar_num(self): + self.parse('$0') + self.parse('$5') + self.parse('$42') + self.parse('2+$42.attrname') + self.parse("from __future__ import print_function\nx = ($0, print)") + py.test.raises(SyntaxError, self.parse, '$') + py.test.raises(SyntaxError, self.parse, '$a') + py.test.raises(SyntaxError, self.parse, '$.5') + + From pypy.commits at gmail.com Tue Jun 5 17:32:37 2018 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 05 Jun 2018 14:32:37 -0700 (PDT) Subject: [pypy-commit] pypy pyparser-improvements-3: yet a slightly different interface to fix the parser module Message-ID: <5b170175.1c69fb81.38133.14b2@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: pyparser-improvements-3 Changeset: r94726:c6dd81b203df Date: 2018-06-05 23:12 +0200 http://bitbucket.org/pypy/pypy/changeset/c6dd81b203df/ Log: yet a slightly different interface to fix the parser module diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -138,11 +138,16 @@ class Terminal(Node): __slots__ = ("value", "lineno", "column") - def __init__(self, token): - Node.__init__(self, token.token_type) - self.value = token.value - self.lineno = token.lineno - self.column = token.column + def __init__(self, type, value, lineno, column): + Node.__init__(self, type) + self.value = value + self.lineno = lineno + self.column = column + + @staticmethod + def fromtoken(token): + return Terminal( + token.token_type, token.value, token.lineno, token.column) def __repr__(self): return "Terminal(type=%s, value=%r)" % (self.type, self.value) @@ -335,7 +340,7 @@ def shift(self, next_state, token): """Shift a non-terminal and prepare for the next state.""" - new_node = Terminal(token) + new_node = Terminal.fromtoken(token) self.stack.node_append_child(new_node) self.stack.state = next_state diff --git a/pypy/interpreter/pyparser/test/test_parser.py b/pypy/interpreter/pyparser/test/test_parser.py --- a/pypy/interpreter/pyparser/test/test_parser.py +++ b/pypy/interpreter/pyparser/test/test_parser.py @@ -58,7 +58,7 @@ value = "\n" else: value = "" - n = parser.Terminal(parser.Token(tp, value, 0, 0, '')) + n = parser.Terminal(tp, value, 0, 0) else: tp = gram.symbol_ids[data[0]] n = parser.Nonterminal(tp) From pypy.commits at gmail.com Tue Jun 5 17:33:19 2018 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 05 Jun 2018 14:33:19 -0700 (PDT) Subject: [pypy-commit] pypy pyparser-improvements-3: whatsnew Message-ID: <5b17019f.1c69fb81.44d9b.972c@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: pyparser-improvements-3 Changeset: r94728:df69bab974f4 Date: 2018-06-05 23:32 +0200 http://bitbucket.org/pypy/pypy/changeset/df69bab974f4/ Log: whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -27,3 +27,8 @@ The reverse-debugger branch has been merged. For more information, see https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. From pypy.commits at gmail.com Tue Jun 5 17:32:39 2018 From: pypy.commits at gmail.com (cfbolz) Date: Tue, 05 Jun 2018 14:32:39 -0700 (PDT) Subject: [pypy-commit] pypy pyparser-improvements-3: slightly less silly code in the parser module Message-ID: <5b170177.1c69fb81.bd48d.2e14@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: pyparser-improvements-3 Changeset: r94727:159368e940b5 Date: 2018-06-05 23:26 +0200 http://bitbucket.org/pypy/pypy/changeset/159368e940b5/ Log: slightly less silly code in the parser module diff --git a/pypy/module/parser/pyparser.py b/pypy/module/parser/pyparser.py --- a/pypy/module/parser/pyparser.py +++ b/pypy/module/parser/pyparser.py @@ -133,10 +133,9 @@ space.newtext(message)) -def get_node_type(space, w_tuple): +def get_node_type(space, tup_w, w_tuple): try: - w_type = space.getitem(w_tuple, space.newint(0)) - return space.int_w(w_type) + return space.int_w(tup_w[0]) except OperationError: raise parser_error(space, w_tuple, "Illegal component tuple.") @@ -145,44 +144,47 @@ self.lineno = 0 def build_node_tree(space, w_tuple): - type = get_node_type(space, w_tuple) + tup_w = space.unpackiterable(w_tuple) + if len(tup_w) == 0: + raise parser_error(space, w_tuple, "tuple too short") + + type = get_node_type(space, tup_w, w_tuple) node_state = NodeState() if 0 <= type < 256: # The tuple is simple, but it doesn't start with a start symbol. # Raise an exception now and be done with it. raise parser_error(space, w_tuple, "Illegal syntax-tree; cannot start with terminal symbol.") + return build_node_children(space, type, tup_w, node_state) + +def build_node_children(space, type, tup_w, node_state): node = pyparse.parser.Nonterminal(type) - build_node_children(space, w_tuple, node, node_state) - return node - -def build_node_children(space, w_tuple, node, node_state): - for w_elem in space.unpackiterable(w_tuple)[1:]: - type = get_node_type(space, w_elem) + for i in range(1, len(tup_w)): + w_elem = tup_w[i] + subtup_w = space.unpackiterable(w_elem) + type = get_node_type(space, subtup_w, w_elem) if type < 256: # Terminal node - length = space.len_w(w_elem) + length = len(subtup_w) if length == 2: - _, w_obj = space.unpackiterable(w_elem, 2) + _, w_obj = subtup_w elif length == 3: - _, w_obj, w_lineno = space.unpackiterable(w_elem, 3) + _, w_obj, w_lineno = subtup_w else: raise parse_error( space, "terminal nodes must have 2 or 3 entries") strn = space.text_w(w_obj) child = pyparse.parser.Terminal(type, strn, node_state.lineno, 0) else: - child = pyparse.parser.Nonterminal(type) + child = build_node_children(space, type, subtup_w, node_state) node.append_child(child) - if type >= 256: # Nonterminal node - build_node_children(space, w_elem, child, node_state) - elif type == pyparse.pygram.tokens.NEWLINE: + if type == pyparse.pygram.tokens.NEWLINE: node_state.lineno += 1 + return node -def validate_node(space, tree): +def validate_node(space, tree, parser): assert tree.type >= 256 type = tree.type - 256 - parser = pyparse.PythonParser(space) if type >= len(parser.grammar.dfas): raise parse_error(space, "Unrecognized node type %d." % type) dfa = parser.grammar.dfas[type] @@ -195,7 +197,7 @@ if label == ch.type: # The child is acceptable; validate it recursively if ch.type >= 256: - validate_node(space, ch) + validate_node(space, ch, parser) # Update the state, and move on to the next child. arcs, is_accepting = dfa.states[next_state] break @@ -209,5 +211,6 @@ def tuple2st(space, w_sequence): # Convert the tree to the internal form before checking it tree = build_node_tree(space, w_sequence) - validate_node(space, tree) + parser = pyparse.PythonParser(space) + validate_node(space, tree, parser) return W_STType(tree, 'eval') From pypy.commits at gmail.com Wed Jun 6 09:13:28 2018 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 06 Jun 2018 06:13:28 -0700 (PDT) Subject: [pypy-commit] pypy pyparser-improvements-3: merge default Message-ID: <5b17ddf8.1c69fb81.3f04a.4fec@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: pyparser-improvements-3 Changeset: r94729:37acacd15a8b Date: 2018-06-06 14:27 +0200 http://bitbucket.org/pypy/pypy/changeset/37acacd15a8b/ Log: merge default diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -27,7 +27,7 @@ generator._resolve_block_targets(blocks) return generator, blocks -class TestCompiler: +class BaseTestCompiler: """These tests compile snippets of code and check them by running them with our own interpreter. These are thus not completely *unit* tests, but given that our interpreter is @@ -74,6 +74,9 @@ def error_test(self, source, exc_type): py.test.raises(exc_type, self.simple_test, source, None, None) + +class TestCompiler(BaseTestCompiler): + def test_issue_713(self): func = "def f(_=2): return (_ if _ else _) if False else _" yield self.st, func, "f()", 2 @@ -953,9 +956,11 @@ yield (self.st, "x=(lambda: (-0.0, 0.0), lambda: (0.0, -0.0))[1]()", 'repr(x)', '(0.0, -0.0)') +class TestCompilerRevDB(BaseTestCompiler): + spaceconfig = {"translation.reverse_debugger": True} + def test_revdb_metavar(self): from pypy.interpreter.reverse_debugging import dbstate, setup_revdb - self.space.config.translation.reverse_debugger = True self.space.reverse_debugging = True try: setup_revdb(self.space) diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -28,6 +28,7 @@ new.symbol_ids = self.symbol_ids new.symbols_names = self.symbol_names new.keyword_ids = self.keyword_ids + new.token_to_error_string = self.token_to_error_string new.dfas = self.dfas new.labels = self.labels new.token_ids = self.token_ids diff --git a/pypy/interpreter/pyparser/pygram.py b/pypy/interpreter/pyparser/pygram.py --- a/pypy/interpreter/pyparser/pygram.py +++ b/pypy/interpreter/pyparser/pygram.py @@ -23,6 +23,17 @@ python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy() del python_grammar_no_print.keyword_ids["print"] +python_grammar_revdb = python_grammar.shared_copy() +python_grammar_no_print_revdb = python_grammar_no_print.shared_copy() +copied_token_ids = python_grammar.token_ids.copy() +python_grammar_revdb.token_ids = copied_token_ids +python_grammar_no_print_revdb.token_ids = copied_token_ids + +metavar_token_id = pytoken.python_tokens['REVDBMETAVAR'] +# the following line affects python_grammar_no_print too, since they share the +# dict +del python_grammar.token_ids[metavar_token_id] + class _Tokens(object): pass for tok_name, idx in pytoken.python_tokens.iteritems(): @@ -39,3 +50,16 @@ syms._rev_lookup = rev_lookup # for debugging del _get_python_grammar, _Tokens, tok_name, sym_name, idx + +def choose_grammar(print_function, revdb): + if print_function: + if revdb: + return python_grammar_no_print_revdb + else: + return python_grammar_no_print + else: + if revdb: + return python_grammar_revdb + else: + return python_grammar + diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -165,10 +165,9 @@ compile_info.last_future_import = last_future_import compile_info.flags |= newflags - if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION: - self.grammar = pygram.python_grammar_no_print - else: - self.grammar = pygram.python_grammar + self.grammar = pygram.choose_grammar( + print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, + revdb=self.space.config.translation.reverse_debugger) try: for token in tokens: diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -168,13 +168,11 @@ assert expected_tree == tree def test_revdb_dollar_num(self): - self.parse('$0') - self.parse('$5') - self.parse('$42') - self.parse('2+$42.attrname') - py.test.raises(SyntaxError, self.parse, '$') - py.test.raises(SyntaxError, self.parse, '$a') - py.test.raises(SyntaxError, self.parse, '$.5') + assert not self.space.config.translation.reverse_debugger + py.test.raises(SyntaxError, self.parse, '$0') + py.test.raises(SyntaxError, self.parse, '$0 + 5') + py.test.raises(SyntaxError, self.parse, + "from __future__ import print_function\nx = ($0, print)") def test_error_forgotten_chars(self): info = py.test.raises(SyntaxError, self.parse, "if 1\n print 4") @@ -183,3 +181,18 @@ assert "(expected ':')" in info.value.msg info = py.test.raises(SyntaxError, self.parse, "def f:\n print 1") assert "(expected '(')" in info.value.msg + +class TestPythonParserRevDB(TestPythonParser): + spaceconfig = {"translation.reverse_debugger": True} + + def test_revdb_dollar_num(self): + self.parse('$0') + self.parse('$5') + self.parse('$42') + self.parse('2+$42.attrname') + self.parse("from __future__ import print_function\nx = ($0, print)") + py.test.raises(SyntaxError, self.parse, '$') + py.test.raises(SyntaxError, self.parse, '$a') + py.test.raises(SyntaxError, self.parse, '$.5') + + From pypy.commits at gmail.com Wed Jun 6 09:13:32 2018 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 06 Jun 2018 06:13:32 -0700 (PDT) Subject: [pypy-commit] pypy default: merge pyparser-improvements-3 Message-ID: <5b17ddfc.1c69fb81.ec3fa.db32@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r94730:e85e93d7927e Date: 2018-06-06 15:11 +0200 http://bitbucket.org/pypy/pypy/changeset/e85e93d7927e/ Log: merge pyparser-improvements-3 some small refactorings in interpreter/pyparser and module/parser diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -27,3 +27,8 @@ The reverse-debugger branch has been merged. For more information, see https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -43,7 +43,7 @@ self.tok = self.tokens[index] def skip(self, n): - if self.tok[0] == n: + if self.tok.token_type == n: self.next() return True else: @@ -51,7 +51,7 @@ def skip_name(self, name): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME and self.tok[1] == name: + if self.tok.token_type == pygram.tokens.NAME and self.tok.value == name: self.next() return True else: @@ -59,8 +59,8 @@ def next_feature_name(self): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME: - name = self.tok[1] + if self.tok.token_type == pygram.tokens.NAME: + name = self.tok.value self.next() if self.skip_name("as"): self.skip(pygram.tokens.NAME) @@ -101,7 +101,7 @@ # somewhere inside the last __future__ import statement # (at the start would be fine too, but it's easier to grab a # random position inside) - last_position = (it.tok[2], it.tok[3]) + last_position = (it.tok.lineno, it.tok.column) result |= future_flags.get_compiler_feature(it.next_feature_name()) while it.skip(pygram.tokens.COMMA): result |= future_flags.get_compiler_feature(it.next_feature_name()) diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -34,6 +34,18 @@ new.token_ids = self.token_ids return new + + def classify(self, token): + """Find the label for a token.""" + if token.token_type == self.KEYWORD_TOKEN: + label_index = self.keyword_ids.get(token.value, -1) + if label_index != -1: + return label_index + label_index = self.token_ids.get(token.token_type, -1) + if label_index == -1: + raise ParseError("invalid token", token) + return label_index + def _freeze_(self): # Remove some attributes not used in parsing. try: @@ -66,6 +78,33 @@ b[pos] |= bit return str(b) + +class Token(object): + def __init__(self, token_type, value, lineno, column, line): + self.token_type = token_type + self.value = value + self.lineno = lineno + # 0-based offset + self.column = column + self.line = line + + def __repr__(self): + return "Token(%s, %s)" % (self.token_type, self.value) + + def __eq__(self, other): + # for tests + return ( + self.token_type == other.token_type and + self.value == other.value and + self.lineno == other.lineno and + self.column == other.column and + self.line == other.line + ) + + def __ne__(self, other): + return not self == other + + class Node(object): __slots__ = ("type", ) @@ -106,6 +145,11 @@ self.lineno = lineno self.column = column + @staticmethod + def fromtoken(token): + return Terminal( + token.token_type, token.value, token.lineno, token.column) + def __repr__(self): return "Terminal(type=%s, value=%r)" % (self.type, self.value) @@ -194,20 +238,14 @@ class ParseError(Exception): - def __init__(self, msg, token_type, value, lineno, column, line, - expected=-1, expected_str=None): + def __init__(self, msg, token, expected=-1, expected_str=None): self.msg = msg - self.token_type = token_type - self.value = value - self.lineno = lineno - # this is a 0-based index - self.column = column - self.line = line + self.token = token self.expected = expected self.expected_str = expected_str def __str__(self): - return "ParserError(%s, %r)" % (self.token_type, self.value) + return "ParserError(%s)" % (self.token, ) class StackEntry(object): @@ -250,8 +288,8 @@ self.root = None self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) - def add_token(self, token_type, value, lineno, column, line): - label_index = self.classify(token_type, value, lineno, column, line) + def add_token(self, token): + label_index = self.grammar.classify(token) sym_id = 0 # for the annotator while True: dfa = self.stack.dfa @@ -262,7 +300,7 @@ sym_id = self.grammar.labels[i] if label_index == i: # We matched a non-terminal. - self.shift(next_state, token_type, value, lineno, column) + self.shift(next_state, token) state = states[next_state] # While the only possible action is to accept, pop nodes off # the stack. @@ -279,8 +317,7 @@ sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. if sub_node_dfa.could_match_token(label_index): - self.push(sub_node_dfa, next_state, sym_id, lineno, - column) + self.push(sub_node_dfa, next_state, sym_id) break else: # We failed to find any arcs to another state, so unless this @@ -288,8 +325,7 @@ if is_accepting: self.pop() if self.stack is None: - raise ParseError("too much input", token_type, value, - lineno, column, line) + raise ParseError("too much input", token) else: # If only one possible input would satisfy, attach it to the # error. @@ -300,28 +336,16 @@ else: expected = -1 expected_str = None - raise ParseError("bad input", token_type, value, lineno, - column, line, expected, expected_str) + raise ParseError("bad input", token, expected, expected_str) - def classify(self, token_type, value, lineno, column, line): - """Find the label for a token.""" - if token_type == self.grammar.KEYWORD_TOKEN: - label_index = self.grammar.keyword_ids.get(value, -1) - if label_index != -1: - return label_index - label_index = self.grammar.token_ids.get(token_type, -1) - if label_index == -1: - raise ParseError("invalid token", token_type, value, lineno, column, - line) - return label_index - def shift(self, next_state, token_type, value, lineno, column): + def shift(self, next_state, token): """Shift a non-terminal and prepare for the next state.""" - new_node = Terminal(token_type, value, lineno, column) + new_node = Terminal.fromtoken(token) self.stack.node_append_child(new_node) self.stack.state = next_state - def push(self, next_dfa, next_state, node_type, lineno, column): + def push(self, next_dfa, next_state, node_type): """Push a terminal and adjust the current state.""" self.stack.state = next_state self.stack = self.stack.push(next_dfa, 0) diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -147,37 +147,37 @@ flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) - tp = 0 try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) - - newflags, last_future_import = ( - future.add_future_flags(self.future_flags, tokens)) - compile_info.last_future_import = last_future_import - compile_info.flags |= newflags - - self.grammar = pygram.choose_grammar( - print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, - revdb=self.space.config.translation.reverse_debugger) - - for tp, value, lineno, column, line in tokens: - if self.add_token(tp, value, lineno, column, line): - break except error.TokenError as e: e.filename = compile_info.filename raise except error.TokenIndentationError as e: e.filename = compile_info.filename raise + + newflags, last_future_import = ( + future.add_future_flags(self.future_flags, tokens)) + compile_info.last_future_import = last_future_import + compile_info.flags |= newflags + + self.grammar = pygram.choose_grammar( + print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, + revdb=self.space.config.translation.reverse_debugger) + + try: + for token in tokens: + if self.add_token(token): + break except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError - if tp == pygram.tokens.INDENT: + if token.token_type == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" @@ -189,7 +189,7 @@ # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' - raise new_err(msg, e.lineno, e.column + 1, e.line, + raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line, compile_info.filename) else: tree = self.root diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py --- a/pypy/interpreter/pyparser/pytokenize.py +++ b/pypy/interpreter/pyparser/pytokenize.py @@ -1,9 +1,6 @@ # ______________________________________________________________________ """Module pytokenize -THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED -TO BE ANNOTABLE (Mainly made lists homogeneous) - This is a modified version of Ka-Ping Yee's tokenize module found in the Python standard library. @@ -12,7 +9,6 @@ expressions have been replaced with hand built DFA's using the basil.util.automata module. -$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ """ # ______________________________________________________________________ @@ -65,22 +61,3 @@ single_quoted[t] = t tabsize = 8 - -# PYPY MODIFICATION: removed TokenError class as it's not needed here - -# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here - -# PYPY MODIFICATION: removed printtoken() as it's not needed here - -# PYPY MODIFICATION: removed tokenize() as it's not needed here - -# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here - -# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified -# in pythonlexer.py - -# PYPY MODIFICATION: removed main() as it's not needed here - -# ______________________________________________________________________ -# End of pytokenize.py - diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -1,4 +1,5 @@ from pypy.interpreter.pyparser import automata +from pypy.interpreter.pyparser.parser import Token from pypy.interpreter.pyparser.pygram import tokens from pypy.interpreter.pyparser.pytoken import python_opmap from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError @@ -103,7 +104,7 @@ endmatch = endDFA.recognize(line) if endmatch >= 0: pos = end = endmatch - tok = (tokens.STRING, contstr + line[:end], strstart[0], + tok = Token(tokens.STRING, contstr + line[:end], strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -111,7 +112,7 @@ contline = None elif (needcont and not line.endswith('\\\n') and not line.endswith('\\\r\n')): - tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], + tok = Token(tokens.ERRORTOKEN, contstr + line, strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -140,11 +141,11 @@ if column > indents[-1]: # count indents or dedents indents.append(column) - token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) + token_list.append(Token(tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' while column < indents[-1]: indents.pop() - token_list.append((tokens.DEDENT, '', lnum, pos, line)) + token_list.append(Token(tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" @@ -177,11 +178,11 @@ token, initial = line[start:end], line[start] if initial in numchars or \ (initial == '.' and token != '.'): # ordinary number - token_list.append((tokens.NUMBER, token, lnum, start, line)) + token_list.append(Token(tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': if not parenstack: - tok = (tokens.NEWLINE, last_comment, lnum, start, line) + tok = Token(tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' elif initial == '#': @@ -193,7 +194,7 @@ if endmatch >= 0: # all on one line pos = endmatch token = line[start:pos] - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' else: @@ -212,16 +213,16 @@ contline = line break else: # ordinary string - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' elif initial in namechars: # ordinary name - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) last_comment = '' elif initial == '\\': # continued stmt continued = 1 elif initial == '$': - token_list.append((tokens.REVDBMETAVAR, token, + token_list.append(Token(tokens.REVDBMETAVAR, token, lnum, start, line)) last_comment = '' else: @@ -246,7 +247,7 @@ punct = python_opmap[token] else: punct = tokens.OP - token_list.append((punct, token, lnum, start, line)) + token_list.append(Token(punct, token, lnum, start, line)) last_comment = '' else: start = whiteSpaceDFA.recognize(line, pos) @@ -255,22 +256,22 @@ if start= 256: # Nonterminal node - build_node_children(space, w_elem, child, node_state) - elif type == pyparse.pygram.tokens.NEWLINE: + if type == pyparse.pygram.tokens.NEWLINE: node_state.lineno += 1 + return node -def validate_node(space, tree): +def validate_node(space, tree, parser): assert tree.type >= 256 type = tree.type - 256 - parser = pyparse.PythonParser(space) if type >= len(parser.grammar.dfas): raise parse_error(space, "Unrecognized node type %d." % type) dfa = parser.grammar.dfas[type] @@ -195,7 +197,7 @@ if label == ch.type: # The child is acceptable; validate it recursively if ch.type >= 256: - validate_node(space, ch) + validate_node(space, ch, parser) # Update the state, and move on to the next child. arcs, is_accepting = dfa.states[next_state] break @@ -209,5 +211,6 @@ def tuple2st(space, w_sequence): # Convert the tree to the internal form before checking it tree = build_node_tree(space, w_sequence) - validate_node(space, tree) + parser = pyparse.PythonParser(space) + validate_node(space, tree, parser) return W_STType(tree, 'eval') From pypy.commits at gmail.com Thu Jun 7 11:59:12 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:12 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: first stab at transparent smart pointer support and improved templated methods Message-ID: <5b195650.1c69fb81.3108c.294b@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94732:a1135702ca77 Date: 2018-05-18 10:28 -0700 http://bitbucket.org/pypy/pypy/changeset/a1135702ca77/ Log: first stab at transparent smart pointer support and improved templated methods diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -207,6 +207,8 @@ 'num_bases' : ([c_type], c_int), 'base_name' : ([c_type, c_int], c_ccharp), 'is_subtype' : ([c_type, c_type], c_int), + 'smartptr_info' : ([c_ccharp, c_voidp, c_voidp], c_int), + 'add_smartptr_type' : ([c_ccharp], c_void), 'base_offset' : ([c_type, c_type, c_object, c_int], c_ptrdiff_t), @@ -479,6 +481,21 @@ if derived == base: return bool(1) return space.bool_w(call_capi(space, 'is_subtype', [_ArgH(derived.handle), _ArgH(base.handle)])) +def c_smartptr_info(space, name): + out_raw = lltype.malloc(rffi.ULONGP.TO, 1, flavor='raw', zero=True) + out_deref = lltype.malloc(rffi.ULONGP.TO, 1, flavor='raw', zero=True) + try: + args = [_ArgS(name), + _ArgP(rffi.cast(rffi.VOIDP, out_raw)), _ArgP(rffi.cast(rffi.VOIDP, out_deref))] + result = space.bool_w(call_capi(space, 'smartptr_info', args)) + raw = rffi.cast(C_TYPE, out_raw[0]) + deref = rffi.cast(C_METHOD, out_deref[0]) + finally: + lltype.free(out_deref, flavor='raw') + lltype.free(out_raw, flavor='raw') + return (result, raw, deref) +def c_add_smartptr_type(space, name): + return space.bool_w(call_capi(space, 'add_smartptr_type', [_ArgS(name)])) def _c_base_offset(space, derived_h, base_h, address, direction): args = [_ArgH(derived_h), _ArgH(base_h), _ArgH(address), _ArgL(direction)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -700,6 +700,24 @@ "no overload found matching %s", self.signature) +class SmartPtrCppObjectConverter(TypeConverter): + _immutable_fields = ['smart', 'raw', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + self.smartdecl = smartdecl + w_raw = get_pythonized_cppclass(space, raw) + self.rawdecl = space.interp_w(W_CPPClassDecl, + space.findattr(w_raw, space.newtext("__cppdecl__"))) + self.deref = deref + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, address, + self.rawdecl, smartdecl=self.smartdecl, deref=self.deref, do_cast=False) + + class MacroConverter(TypeConverter): def from_memory(self, space, w_obj, w_pycppclass, offset): # TODO: get the actual type info from somewhere ... @@ -715,26 +733,25 @@ # 1) full, exact match # 1a) const-removed match # 2) match of decorated, unqualified type - # 3) accept ref as pointer (for the stubs, const& can be - # by value, but that does not work for the ffi path) - # 4) generalized cases (covers basically all user classes) - # 5) void* or void converter (which fails on use) + # 3) generalized cases (covers basically all user classes) + # 3a) smart pointers + # 4) void* or void converter (which fails on use) name = capi.c_resolve_name(space, _name) - # 1) full, exact match + # full, exact match try: return _converters[name](space, default) except KeyError: pass - # 1a) const-removed match + # const-removed match try: return _converters[helper.remove_const(name)](space, default) except KeyError: pass - # 2) match of decorated, unqualified type + # match of decorated, unqualified type compound = helper.compound(name) clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: @@ -744,15 +761,19 @@ except KeyError: pass - # 3) TODO: accept ref as pointer - - # 4) generalized cases (covers basically all user classes) + # generalized cases (covers basically all user classes) from pypy.module._cppyy import interp_cppyy scope_decl = interp_cppyy.scope_byname(space, clean_name) if scope_decl: - # type check for the benefit of the annotator from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl clsdecl = space.interp_w(W_CPPClassDecl, scope_decl, can_be_None=False) + + # check smart pointer type + check_smart = capi.c_smartptr_info(space, clean_name) + if check_smart[0]: + return SmartPtrCppObjectConverter(space, clsdecl, check_smart[1], check_smart[2]) + + # type check for the benefit of the annotator if compound == "*": return InstancePtrConverter(space, clsdecl) elif compound == "&": @@ -772,7 +793,7 @@ if pos > 0: return FunctionPointerConverter(space, name[pos+2:]) - # 5) void* or void converter (which fails on use) + # void* or void converter (which fails on use) if 0 <= compound.find('*'): return VoidPtrConverter(space, default) # "user knows best" diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -22,6 +22,13 @@ OVERLOAD_FLAGS_USE_FFI = 0x0001 +FUNCTION_IS_GLOBAL = 0x0001 +FUNCTION_IS_STATIC = 0x0001 +FUNCTION_IS_METHOD = 0x0002 +FUNCTION_IS_CONSTRUCTOR = 0x0004 +FUNCTION_IS_TEMPLATE = 0x0008 +FUNCTION_IS_SETITEM = 0x0010 + class FastCallNotPossible(Exception): pass @@ -101,9 +108,9 @@ state.cppscope_cache[final_scoped_name] = cppscope if not isns: - # build methods/data; TODO: also defer this for classes (a functional __dir__ + # build overloads/data; TODO: also defer this for classes (a functional __dir__ # and instrospection for help() is enough and allows more lazy loading) - cppscope._build_methods() + cppscope._build_overloads() cppscope._find_datamembers() return cppscope @@ -157,7 +164,7 @@ # CPPConstructor: specialization for allocating a new object # CPPFunction: specialization for free and static functions # CPPSetItem: specialization for Python's __setitem__ -# CPPTemplatedCall: trampoline to instantiate and bind templated functions +# CPPTemplateMethod: trampoline to instantiate and bind templated functions # W_CPPOverload, W_CPPConstructorOverload, W_CPPTemplateOverload: # user-facing, app-level, collection of overloads, with specializations # for constructors and templates @@ -456,7 +463,7 @@ return "CPPFunction: %s" % self.prototype() -class CPPTemplatedCall(CPPMethod): +class CPPTemplateMethod(CPPMethod): """Method dispatcher that first resolves the template instance.""" _attrs_ = ['space', 'templ_args'] @@ -465,7 +472,7 @@ def __init__(self, space, templ_args, declaring_scope, method_index, arg_defs, args_required): self.space = space self.templ_args = templ_args - # TODO: might have to specialize for CPPTemplatedCall on CPPMethod/CPPFunction here + # TODO: might have to specialize for CPPTemplateMethod on CPPMethod/CPPFunction here CPPMethod.__init__(self, space, declaring_scope, method_index, arg_defs, args_required) def call(self, cppthis, args_w, useffi): @@ -486,7 +493,7 @@ return CPPMethod.call(self, cppthis, args_w, useffi) def __repr__(self): - return "CPPTemplatedCall: %s" % self.prototype() + return "CPPTemplateMethod: %s" % self.prototype() class CPPConstructor(CPPMethod): @@ -632,8 +639,8 @@ W_CPPOverload.typedef = TypeDef( 'CPPOverload', + call = interp2app(W_CPPOverload.call), is_static = interp2app(W_CPPOverload.is_static), - call = interp2app(W_CPPOverload.call), __useffi__ = GetSetProperty(W_CPPOverload.fget_useffi, W_CPPOverload.fset_useffi), prototype = interp2app(W_CPPOverload.prototype), ) @@ -668,8 +675,8 @@ W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', + call = interp2app(W_CPPConstructorOverload.call), is_static = interp2app(W_CPPConstructorOverload.is_static), - call = interp2app(W_CPPConstructorOverload.call), prototype = interp2app(W_CPPConstructorOverload.prototype), ) @@ -685,6 +692,10 @@ W_CPPTemplateOverload.typedef = TypeDef( 'CPPTemplateOverload', __getitem__ = interp2app(W_CPPTemplateOverload.call), + call = interp2app(W_CPPTemplateOverload.call), + is_static = interp2app(W_CPPTemplateOverload.is_static), + __useffi__ = GetSetProperty(W_CPPTemplateOverload.fget_useffi, W_CPPTemplateOverload.fset_useffi), + prototype = interp2app(W_CPPTemplateOverload.prototype), ) @@ -818,7 +829,7 @@ class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'handle', 'flags', 'name', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'flags', 'name', 'overloads', 'datamembers'] _immutable_fields_ = ['handle', 'name'] def __init__(self, space, opaque_handle, final_scoped_name): @@ -827,27 +838,27 @@ self.handle = opaque_handle self.flags = 0 self.name = final_scoped_name - self.methods = {} - # Do not call "self._build_methods()" here, so that a distinction can + self.overloads = {} + # Do not call "self._build_overloadss()" here, so that a distinction can # be made between testing for existence (i.e. existence in the cache # of classes) and actual use. Point being that a class can use itself, # e.g. as a return type or an argument to one of its methods. self.datamembers = {} - # Idem as for self.methods: a type could hold itself by pointer. + # Idem as for self.overloads: a type could hold itself by pointer. def get_method_names(self): - return self.space.newlist([self.space.newtext(name) for name in self.methods]) + return self.space.newlist([self.space.newtext(name) for name in self.overloads]) @unwrap_spec(name='text') def get_overload(self, name): try: - return self.methods[name] + return self.overloads[name] except KeyError: pass - new_method = self.find_overload(name) - self.methods[name] = new_method - return new_method + new_ol = self.find_overload(name) + self.overloads[name] = new_ol + return new_ol def get_datamember_names(self): return self.space.newlist([self.space.newtext(name) for name in self.datamembers]) @@ -883,10 +894,10 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'overloads', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def _make_cppfunction(self, pyname, index): + def _make_cppfunction(self, pyname, index, funcs): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) arg_defs = [] @@ -894,7 +905,8 @@ arg_type = capi.c_method_arg_type(self.space, self, index, i) arg_dflt = capi.c_method_arg_default(self.space, self, index, i) arg_defs.append((arg_type, arg_dflt)) - return CPPFunction(self.space, self, index, arg_defs, args_required) + funcs.append(CPPFunction(self.space, self, index, arg_defs, args_required)) + return FUNCTION_IS_GLOBAL def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) @@ -912,12 +924,12 @@ indices = capi.c_method_indices_from_name(self.space, self, meth_name) if not indices: raise self.missing_attribute_error(meth_name) - cppfunctions = [] + cppfunctions, ftype = [], 0 for meth_idx in indices: - f = self._make_cppfunction(meth_name, meth_idx) - cppfunctions.append(f) - overload = W_CPPOverload(self.space, self, cppfunctions) - return overload + ftype |= self._make_cppfunction(meth_name, meth_idx, cppfunctions) + if ftype & FUNCTION_IS_TEMPLATE: + return W_CPPTemplateOverload(self.sace, self, cppfunctions) + return W_CPPOverload(self.space, self, cppfunctions) def find_datamember(self, dm_name): dm_idx = capi.c_datamember_index(self.space, self, dm_name) @@ -956,12 +968,12 @@ class W_CPPClassDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] - _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]'] + _attrs_ = ['space', 'handle', 'name', 'overloads', 'datamembers'] + _immutable_fields_ = ['handle', 'name', 'overloads[*]', 'datamembers[*]'] - def _build_methods(self): - assert len(self.methods) == 0 - methods_temp = {} + def _build_overloads(self): + assert len(self.overloads) == 0 + overloads_temp = {} for idx in range(capi.c_num_methods(self.space, self)): if capi.c_is_constructor(self.space, self, idx): pyname = '__init__' @@ -970,29 +982,36 @@ capi.c_method_name(self.space, self, idx), capi.c_method_num_args(self.space, self, idx), capi.c_method_result_type(self.space, self, idx)) - cppmethod = self._make_cppfunction(pyname, idx) - methods_temp.setdefault(pyname, []).append(cppmethod) + try: + detail = overloads_temp[pyname] + except KeyError: + detail = [[], 0]; overloads_temp[pyname] = detail + detail[1] |= self._make_cppfunction(pyname, idx, detail[0]) # the following covers the case where the only kind of operator[](idx) # returns are the ones that produce non-const references; these can be # used for __getitem__ just as much as for __setitem__, though - if not "__getitem__" in methods_temp: + if not "__getitem__" in overloads_temp: try: - for m in methods_temp["__setitem__"]: - cppmethod = self._make_cppfunction("__getitem__", m.index) - methods_temp.setdefault("__getitem__", []).append(cppmethod) + sid = overloads_temp["__setitem__"] + gid = [[], 0]; overloads_temp["__getitem__"] = gid + for m in sid[0]: + gid[1] |= self._make_cppfunction("__getitem__", m.index, gid[0]) except KeyError: pass # just means there's no __setitem__ either - # create the overload methods from the method sets - for pyname, methods in methods_temp.iteritems(): + # create the overloads from the method sets + for pyname, detail in overloads_temp.iteritems(): + methods = detail[0] CPPMethodSort(methods).sort() if pyname == '__init__': - overload = W_CPPConstructorOverload(self.space, self, methods[:]) + overload = W_CPPConstructorOverload(self.space, self, methods) + elif detail[1] & FUNCTION_IS_TEMPLATE: + overload = W_CPPTemplateOverload(self.space, self, methods) else: - overload = W_CPPOverload(self.space, self, methods[:]) - self.methods[pyname] = overload + overload = W_CPPOverload(self.space, self, methods) + self.overloads[pyname] = overload - def _make_cppfunction(self, pyname, index): + def _make_cppfunction(self, pyname, index, funcs): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) arg_defs = [] @@ -1000,18 +1019,25 @@ arg_type = capi.c_method_arg_type(self.space, self, index, i) arg_dflt = capi.c_method_arg_default(self.space, self, index, i) arg_defs.append((arg_type, arg_dflt)) + ftype = 0 if capi.c_is_constructor(self.space, self, index): cppfunction = CPPConstructor(self.space, self, index, arg_defs, args_required) + ftype = FUNCTION_IS_CONSTRUCTOR elif capi.c_method_is_template(self.space, self, index): templ_args = capi.c_template_args(self.space, self, index) - cppfunction = CPPTemplatedCall(self.space, templ_args, self, index, arg_defs, args_required) + cppfunction = CPPTemplateMethod(self.space, templ_args, self, index, arg_defs, args_required) + ftype = FUNCTION_IS_TEMPLATE elif capi.c_is_staticmethod(self.space, self, index): cppfunction = CPPFunction(self.space, self, index, arg_defs, args_required) + ftype = FUNCTION_IS_STATIC elif pyname == "__setitem__": cppfunction = CPPSetItem(self.space, self, index, arg_defs, args_required) + ftype = FUNCTION_IS_SETITEM else: cppfunction = CPPMethod(self.space, self, index, arg_defs, args_required) - return cppfunction + ftype = FUNCTION_IS_METHOD + funcs.append(cppfunction) + return ftype def _find_datamembers(self): num_datamembers = capi.c_num_datamembers(self.space, self) @@ -1106,13 +1132,14 @@ class W_CPPInstance(W_Root): - _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags', + _attrs_ = ['space', 'clsdecl', '_rawobject', 'smartdecl', 'deref', 'flags', 'finalizer_registered'] - _immutable_fields_ = ['clsdecl'] + _immutable_fields_ = ['clsdecl', 'smartdecl', 'deref'] finalizer_registered = False - def __init__(self, space, decl, rawobject, isref, python_owns): + def __init__(self, space, decl, rawobject, isref, python_owns, + smartdecl=None, deref=rffi.cast(capi.C_METHOD, 0)): self.space = space self.clsdecl = decl assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -1120,11 +1147,13 @@ self._rawobject = rawobject assert not isref or not python_owns self.flags = 0 - if isref: + if isref or (smartdecl and deref): self.flags |= INSTANCE_FLAGS_IS_REF if python_owns: self.flags |= INSTANCE_FLAGS_PYTHON_OWNS self._opt_register_finalizer() + self.smartdecl = smartdecl + self.deref = deref def _opt_register_finalizer(self): if not self.finalizer_registered and not hasattr(self.space, "fake"): @@ -1156,6 +1185,11 @@ def get_rawobject(self): if not (self.flags & INSTANCE_FLAGS_IS_REF): return self._rawobject + elif self.smartdecl and self.deref: + args = capi.c_allocate_function_args(self.space, 0) + rawptr = capi.c_call_l(self.space, self.deref, self._rawobject, 0, args) + capi.c_deallocate_function_args(self.space, args) + return rffi.cast(capi.C_OBJECT, rawptr) else: ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject) return rffi.cast(capi.C_OBJECT, ptrptr[0]) @@ -1192,8 +1226,9 @@ meth_idx = capi.c_get_global_operator( self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: - f = nss._make_cppfunction("operator==", meth_idx) - ol = W_CPPOverload(self.space, nss, [f]) + funcs = [] + nss._make_cppfunction("operator==", meth_idx, funcs) + ol = W_CPPOverload(self.space, nss, funcs) # TODO: cache this operator (not done yet, as the above does not # select all overloads) return ol.call(self, [self, w_other]) @@ -1244,6 +1279,10 @@ return self.space.newtext("<%s object at 0x%x>" % (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) + def smartptr(self): + if self._rawobject and self.smartdecl: + return wrap_cppinstance(self.space, self._rawobject, self.smartdecl, do_cast=False) + def destruct(self): if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): memory_regulator.unregister(self) @@ -1264,6 +1303,7 @@ __len__ = interp2app(W_CPPInstance.instance__len__), __cmp__ = interp2app(W_CPPInstance.instance__cmp__), __repr__ = interp2app(W_CPPInstance.instance__repr__), + __smartptr__ = interp2app(W_CPPInstance.smartptr), __destruct__ = interp2app(W_CPPInstance.destruct), ) W_CPPInstance.typedef.acceptable_as_base_class = True @@ -1314,6 +1354,7 @@ return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar)) def wrap_cppinstance(space, rawobject, clsdecl, + smartdecl=None, deref=rffi.cast(capi.C_METHOD, 0), do_cast=True, python_owns=False, is_ref=False, fresh=False): rawobject = rffi.cast(capi.C_OBJECT, rawobject) @@ -1346,7 +1387,7 @@ # fresh creation w_cppinstance = space.allocate_instance(W_CPPInstance, w_pycppclass) cppinstance = space.interp_w(W_CPPInstance, w_cppinstance) - cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns) + cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns, smartdecl, deref) memory_regulator.register(cppinstance) return w_cppinstance diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -36,23 +36,22 @@ self._scope = scope def _arg_to_str(self, arg): - try: - arg = arg.__cppname__ - except AttributeError: - if arg == str: - import _cppyy - arg = _cppyy._std_string_name() - elif type(arg) != str: - arg = arg.__name__ - return arg + # arguments are strings representing types, types, or builtins + if type(arg) == str: + return arg # string describing type + elif hasattr(arg, '__cppname__'): + return arg.__cppname__ # C++ bound type + elif arg == str: + import _cppyy + return _cppyy._std_string_name() # special case pystr -> C++ string + elif isinstance(arg, type): # builtin types + return arg.__name__ + return str(arg) # builtin values def __call__(self, *args): fullname = ''.join( [self._name, '<', ','.join(map(self._arg_to_str, args))]) - if fullname[-1] == '>': - fullname += ' >' - else: - fullname += '>' + fullname += '>' return getattr(self._scope, fullname) def __getitem__(self, *args): diff --git a/pypy/module/_cppyy/test/templates.h b/pypy/module/_cppyy/test/templates.h --- a/pypy/module/_cppyy/test/templates.h +++ b/pypy/module/_cppyy/test/templates.h @@ -1,3 +1,7 @@ +#include +#include + + //=========================================================================== class MyTemplatedMethodClass { // template methods public: @@ -21,7 +25,7 @@ return sizeof(B); } -// +// typedef MyTemplatedMethodClass MyTMCTypedef_t; // explicit instantiation @@ -33,3 +37,109 @@ inline long MyTemplatedMethodClass::get_size() { return 42; } + + +//=========================================================================== +// global templated functions +template +long global_get_size() { + return sizeof(T); +} + +template +int global_some_foo(T) { + return 42; +} + +template +int global_some_bar(T) { + return 13; +} + + +//=========================================================================== +// variadic functions +inline bool isSomeInt(int) { return true; } +inline bool isSomeInt(double) { return false; } +template +inline bool isSomeInt(Args...) { return false; } + +namespace AttrTesting { + +struct Obj1 { int var1; }; +struct Obj2 { int var2; }; + +template +constexpr auto has_var1(T t) -> decltype(t.var1, true) { return true; } + +template +constexpr bool has_var1(Args...) { return false; } + +template +constexpr bool call_has_var1(T&& t) { return AttrTesting::has_var1(std::forward(t)); } + +template +struct select_template_arg {}; + +template +struct select_template_arg<0, T0, T...> { + typedef T0 type; +}; + +template +struct select_template_arg { + typedef typename select_template_arg::type argument; +}; + +} // AttrTesting + + +namespace SomeNS { + +template +int some_foo(T) { + return 42; +} + +template +int some_bar() { + return T; +} + +inline std::string tuplify(std::ostringstream& out) { + out.seekp(-2, out.cur); out << ')'; + return out.str(); +} + +template +std::string tuplify(std::ostringstream& out, T value, Args... args) +{ + out << value << ", "; + return tuplify(out, args...); +} + +} // namespace SomeNS + + +//=========================================================================== +// using of static data +// TODO: this should live here instead of in test_templates.test08 +/* +template struct BaseClassWithStatic { + static T const ref_value; +}; + +template +T const BaseClassWithStatic::ref_value = 42; + +template +struct DerivedClassUsingStatic : public BaseClassWithStatic { + using BaseClassWithStatic::ref_value; + + explicit DerivedClassUsingStatic(T x) : BaseClassWithStatic() { + m_value = x > ref_value ? ref_value : x; + } + + T m_value; +}; +*/ diff --git a/pypy/module/_cppyy/test/templates.xml b/pypy/module/_cppyy/test/templates.xml --- a/pypy/module/_cppyy/test/templates.xml +++ b/pypy/module/_cppyy/test/templates.xml @@ -3,4 +3,17 @@ + + + + + + + + + + + + + diff --git a/pypy/module/_cppyy/test/test_cppyy.py b/pypy/module/_cppyy/test/test_cppyy.py --- a/pypy/module/_cppyy/test/test_cppyy.py +++ b/pypy/module/_cppyy/test/test_cppyy.py @@ -18,7 +18,7 @@ w_cppyyclass = interp_cppyy.scope_byname(space, "example01") w_cppyyclass2 = interp_cppyy.scope_byname(space, "example01") assert space.is_w(w_cppyyclass, w_cppyyclass2) - adddouble = w_cppyyclass.methods["staticAddToDouble"] + adddouble = w_cppyyclass.overloads["staticAddToDouble"] func, = adddouble.functions assert func.executor is None func._setup(None) # creates executor diff --git a/pypy/module/_cppyy/test/test_templates.py b/pypy/module/_cppyy/test/test_templates.py --- a/pypy/module/_cppyy/test/test_templates.py +++ b/pypy/module/_cppyy/test/test_templates.py @@ -25,8 +25,6 @@ m = _cppyy.gbl.MyTemplatedMethodClass() - return - # pre-instantiated assert m.get_size['char']() == m.get_char_size() assert m.get_size[int]() == m.get_int_size() @@ -41,3 +39,136 @@ # auto through typedef assert m.get_size['MyTMCTypedef_t']() == m.get_self_size() + + def test02_non_type_template_args(self): + """Use of non-types as template arguments""" + + import _cppyy + + _cppyy.gbl.gInterpreter.Declare("template int nt_templ_args() { return i; };") + + assert _cppyy.gbl.nt_templ_args[1]() == 1 + assert _cppyy.gbl.nt_templ_args[256]() == 256 + + def test03_templated_function(self): + """Templated global and static functions lookup and calls""" + + import _cppyy + + # TODO: the following only works if something else has already + # loaded the headers associated with this template + ggs = _cppyy.gbl.global_get_size + assert ggs['char']() == 1 + + gsf = _cppyy.gbl.global_some_foo + + assert gsf[int](3) == 42 + assert gsf(3) == 42 + assert gsf(3.) == 42 + + gsb = _cppyy.gbl.global_some_bar + + assert gsb(3) == 13 + assert gsb['double'](3.) == 13 + + # TODO: the following only works in a namespace + nsgsb = _cppyy.gbl.SomeNS.some_bar + + assert nsgsb[3] + assert nsgsb[3]() == 3 + + # TODO: add some static template method + + def test04_variadic_function(self): + """Call a variadic function""" + + import _cppyy + + s = _cppyy.gbl.std.ostringstream() + #s << '(' + #_cppyy.gbl.SomeNS.tuplify(s, 1, 4., "aap") + #assert s.str() == '(1, 4, aap) + + def test05_variadic_overload(self): + """Call an overloaded variadic function""" + + import _cppyy + + assert _cppyy.gbl.isSomeInt(3.) == False + assert _cppyy.gbl.isSomeInt(1) == True + assert _cppyy.gbl.isSomeInt() == False + assert _cppyy.gbl.isSomeInt(1, 2, 3) == False + + def test06_variadic_sfinae(self): + """Attribute testing through SFINAE""" + + import _cppyy + Obj1 = _cppyy.gbl.AttrTesting.Obj1 + Obj2 = _cppyy.gbl.AttrTesting.Obj2 + has_var1 = _cppyy.gbl.AttrTesting.has_var1 + call_has_var1 = _cppyy.gbl.AttrTesting.call_has_var1 + + move = _cppyy.gbl.std.move + + assert has_var1(Obj1()) == hasattr(Obj1(), 'var1') + assert has_var1(Obj2()) == hasattr(Obj2(), 'var1') + assert has_var1(3) == hasattr(3, 'var1') + assert has_var1("aap") == hasattr("aap", 'var1') + + assert call_has_var1(move(Obj1())) == True + assert call_has_var1(move(Obj2())) == False + + def test07_type_deduction(self): + """Traits/type deduction""" + + import _cppyy + Obj1 = _cppyy.gbl.AttrTesting.Obj1 + Obj2 = _cppyy.gbl.AttrTesting.Obj2 + select_template_arg = _cppyy.gbl.AttrTesting.has_var1 + + #assert select_template_arg[0, Obj1, Obj2].argument == Obj1 + assert select_template_arg[1, Obj1, Obj2].argument == Obj2 + raises(TypeError, select_template_arg.__getitem__, 2, Obj1, Obj2) + + # TODO, this doesn't work for builtin types as the 'argument' + # typedef will not resolve to a class + #assert select_template_arg[1, int, float].argument == float + + def test08_using_of_static_data(self): + """Derived class using static data of base""" + + import _cppyy + + # TODO: the following should live in templates.h, but currently fails + # in TClass::GetListOfMethods() + _cppyy.gbl.gInterpreter.Declare(""" + template struct BaseClassWithStatic { + static T const ref_value; + }; + + template + T const BaseClassWithStatic::ref_value = 42; + + template + struct DerivedClassUsingStatic : public BaseClassWithStatic { + using BaseClassWithStatic::ref_value; + + explicit DerivedClassUsingStatic(T x) : BaseClassWithStatic() { + m_value = x > ref_value ? ref_value : x; + } + + T m_value; + };""") + + + # TODO: the ref_value property is inaccessible (offset == -1) + # assert cppyy.gbl.BaseClassWithStatic["size_t"].ref_value == 42 + + b1 = _cppyy.gbl.DerivedClassUsingStatic["size_t"]( 0) + b2 = _cppyy.gbl.DerivedClassUsingStatic["size_t"](100) + + # assert b1.ref_value == 42 + assert b1.m_value == 0 + + # assert b2.ref_value == 42 + assert b2.m_value == 42 From pypy.commits at gmail.com Thu Jun 7 11:59:17 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:17 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: fix memory leak in test Message-ID: <5b195655.1c69fb81.7a4c1.bdb1@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94734:83daee4823bc Date: 2018-05-21 14:30 -0700 http://bitbucket.org/pypy/pypy/changeset/83daee4823bc/ Log: fix memory leak in test diff --git a/pypy/module/_cppyy/test/datatypes.cxx b/pypy/module/_cppyy/test/datatypes.cxx --- a/pypy/module/_cppyy/test/datatypes.cxx +++ b/pypy/module/_cppyy/test/datatypes.cxx @@ -80,6 +80,7 @@ void CppyyTestData::destroy_arrays() { if (m_owns_arrays == true) { delete[] m_bool_array2; + delete[] m_uchar_array2; delete[] m_short_array2; delete[] m_ushort_array2; delete[] m_int_array2; From pypy.commits at gmail.com Thu Jun 7 11:59:14 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:14 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: more smart pointer support Message-ID: <5b195652.1c69fb81.6b2b4.d1d2@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94733:fba8c8e53f31 Date: 2018-05-18 11:23 -0700 http://bitbucket.org/pypy/pypy/changeset/fba8c8e53f31/ Log: more smart pointer support diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -625,7 +625,6 @@ class StdStringRefConverter(InstancePtrConverter): _immutable_fields_ = ['cppclass', 'typecode'] - typecode = 'V' def __init__(self, space, extra): @@ -701,7 +700,8 @@ class SmartPtrCppObjectConverter(TypeConverter): - _immutable_fields = ['smart', 'raw', 'deref'] + _immutable_fields = ['smartdecl', 'rawdecl', 'deref'] + typecode = 'V' def __init__(self, space, smartdecl, raw, deref): from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass @@ -711,6 +711,35 @@ space.findattr(w_raw, space.newtext("__cppdecl__"))) self.deref = deref + def _unwrap_object(self, space, w_obj): + from pypy.module._cppyy.interp_cppyy import W_CPPInstance + if isinstance(w_obj, W_CPPInstance): + # w_obj could carry a 'hidden' smart ptr or be one, cover both cases + have_match = False + if w_obj.smartdecl and capi.c_is_subtype(space, w_obj.smartdecl, self.smartdecl): + # hidden case, do not derefence when getting obj address + have_match = True + rawobject = w_obj._rawobject # TODO: this direct access if fugly + offset = capi.c_base_offset(space, w_obj.smartdecl, self.smartdecl, rawobject, 1) + elif capi.c_is_subtype(space, w_obj.clsdecl, self.smartdecl): + # exposed smart pointer + have_match = True + rawobject = w_obj.get_rawobject() + offset = capi.c_base_offset(space, w_obj.clsdecl, self.smartdecl, rawobject, 1) + if have_match: + obj_address = capi.direct_ptradd(rawobject, offset) + return rffi.cast(capi.C_OBJECT, obj_address) + + raise oefmt(space.w_TypeError, + "cannot pass %T as %s", w_obj, self.clsdecl.name) + + def convert_argument(self, space, w_obj, address, call_local): + x = rffi.cast(rffi.VOIDPP, address) + x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) + address = rffi.cast(capi.C_OBJECT, address) + ba = rffi.cast(rffi.CCHARP, address) + ba[capi.c_function_arg_typeoffset(space)] = self.typecode + def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy From pypy.commits at gmail.com Thu Jun 7 11:59:21 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:21 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: finish pythonization of smart pointers Message-ID: <5b195659.1c69fb81.a85f0.291c@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94736:bf4f9f2a4234 Date: 2018-05-21 20:56 -0700 http://bitbucket.org/pypy/pypy/changeset/bf4f9f2a4234/ Log: finish pythonization of smart pointers diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -699,8 +699,8 @@ "no overload found matching %s", self.signature) -class SmartPtrCppObjectConverter(TypeConverter): - _immutable_fields = ['smartdecl', 'rawdecl', 'deref'] +class SmartPointerConverter(TypeConverter): + _immutable_fields = ['typecode', 'smartdecl', 'rawdecl', 'deref'] typecode = 'V' def __init__(self, space, smartdecl, raw, deref): @@ -746,6 +746,19 @@ return interp_cppyy.wrap_cppinstance(space, address, self.rawdecl, smartdecl=self.smartdecl, deref=self.deref, do_cast=False) +class SmartPointerPtrConverter(SmartPointerConverter): + typecode = 'o' + + def from_memory(self, space, w_obj, w_pycppclass, offset): + self._is_abstract(space) + + def to_memory(self, space, w_obj, w_value, offset): + self._is_abstract(space) + + +class SmartPointerRefConverter(SmartPointerPtrConverter): + typecode = 'V' + class MacroConverter(TypeConverter): def from_memory(self, space, w_obj, w_pycppclass, offset): @@ -800,7 +813,13 @@ # check smart pointer type check_smart = capi.c_smartptr_info(space, clean_name) if check_smart[0]: - return SmartPtrCppObjectConverter(space, clsdecl, check_smart[1], check_smart[2]) + if compound == '': + return SmartPointerConverter(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '*': + return SmartPointerPtrConverter(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '&': + return SmartPointerRefConverter(space, clsdecl, check_smart[1], check_smart[2]) + # fall through: can still return smart pointer in non-smart way # type check for the benefit of the annotator if compound == "*": diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -125,7 +125,6 @@ class CStringExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ccpresult = rffi.cast(rffi.CCHARP, lresult) @@ -136,7 +135,6 @@ class ConstructorExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cpptype, num_args, args): from pypy.module._cppyy import interp_cppyy newthis = capi.c_constructor(space, cppmethod, cpptype, num_args, args) @@ -144,80 +142,77 @@ return space.newlong(rffi.cast(rffi.LONG, newthis)) # really want ptrdiff_t here -class InstancePtrExecutor(FunctionExecutor): - _immutable_fields_ = ['cppclass'] +class InstanceExecutor(FunctionExecutor): + # For return of a C++ instance by pointer: MyClass* func() + _immutable_fields_ = ['clsdecl'] - def __init__(self, space, cppclass): - FunctionExecutor.__init__(self, space, cppclass) - self.cppclass = cppclass + def __init__(self, space, clsdecl): + FunctionExecutor.__init__(self, space, clsdecl) + self.clsdecl = clsdecl + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, + obj, self.clsdecl, do_cast=False, python_owns=True, fresh=True) + + def execute(self, space, cppmethod, cppthis, num_args, args): + oresult = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.clsdecl) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, oresult)) + + +class InstancePtrExecutor(InstanceExecutor): + # For return of a C++ instance by pointer: MyClass* func() def cffi_type(self, space): state = space.fromcache(ffitypes.State) return state.c_voidp + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl) + def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) - return pyres + lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, lresult)) def execute_libffi(self, space, cif_descr, funcaddr, buffer): jit_libffi.jit_ffi_call(cif_descr, funcaddr, buffer) - result = rffi.ptradd(buffer, cif_descr.exchange_result) - from pypy.module._cppyy import interp_cppyy - ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = rffi.ptradd(buffer, cif_descr.exchange_result) + obj = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, presult)[0]) + return self._wrap_result(space, obj) class InstancePtrPtrExecutor(InstancePtrExecutor): + # For return of a C++ instance by ptr-to-ptr or ptr-to-ref: MyClass*& func() def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) - ref_address = rffi.cast(rffi.VOIDPP, voidp_result) - ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = capi.c_call_r(space, cppmethod, cppthis, num_args, args) + ref = rffi.cast(rffi.VOIDPP, presult) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, ref[0])) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible -class InstanceExecutor(InstancePtrExecutor): - - def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) - - def execute_libffi(self, space, cif_descr, funcaddr, buffer): - from pypy.module._cppyy.interp_cppyy import FastCallNotPossible - raise FastCallNotPossible - class StdStringExecutor(InstancePtrExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): cstr, cstr_len = capi.c_call_s(space, cppmethod, cppthis, num_args, args) pystr = rffi.charpsize2str(cstr, cstr_len) capi.c_free(space, rffi.cast(rffi.VOIDP, cstr)) - return space.newbytes(pystr) + return space.newbytes(pystr) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible class StdStringRefExecutor(InstancePtrExecutor): - - def __init__(self, space, cppclass): + def __init__(self, space, clsdecl): from pypy.module._cppyy import interp_cppyy - cppclass = interp_cppyy.scope_byname(space, capi.std_string_name) - InstancePtrExecutor.__init__(self, space, cppclass) + clsdecl = interp_cppyy.scope_byname(space, capi.std_string_name) + InstancePtrExecutor.__init__(self, space, clsdecl) class PyObjectExecutor(PtrTypeExecutor): - def wrap_result(self, space, lresult): space.getbuiltinmodule("cpyext") from pypy.module.cpyext.pyobject import PyObject, from_ref, make_ref, decref @@ -241,6 +236,41 @@ return self.wrap_result(space, rffi.cast(rffi.LONGP, result)[0]) +class SmartPointerExecutor(InstanceExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstanceExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False, python_owns=True, fresh=True) + +class SmartPointerPtrExecutor(InstancePtrExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + # TODO: share this with SmartPointerExecutor through in mixin + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstancePtrExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + # TODO: this is a pointer to a smart pointer, take ownership on the smart one? + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False) + + _executors = {} def get_executor(space, name): # Matching of 'name' to an executor factory goes through up to four levels: @@ -253,7 +283,7 @@ name = capi.c_resolve_name(space, name) - # 1) full, qualified match + # full, qualified match try: return _executors[name](space, None) except KeyError: @@ -262,13 +292,13 @@ compound = helper.compound(name) clean_name = capi.c_resolve_name(space, helper.clean_type(name)) - # 1a) clean lookup + # clean lookup try: return _executors[clean_name+compound](space, None) except KeyError: pass - # 2) drop '&': by-ref is pretty much the same as by-value, python-wise + # drop '&': by-ref is pretty much the same as by-value, python-wise if compound and compound[len(compound)-1] == '&': # TODO: this does not actually work with Reflex (?) try: @@ -276,19 +306,29 @@ except KeyError: pass - # 3) types/classes, either by ref/ptr or by value + # types/classes, either by ref/ptr or by value from pypy.module._cppyy import interp_cppyy cppclass = interp_cppyy.scope_byname(space, clean_name) if cppclass: # type check for the benefit of the annotator from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl - cppclass = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + clsdecl = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + + # check smart pointer type + check_smart = capi.c_smartptr_info(space, clean_name) + if check_smart[0]: + if compound == '': + return SmartPointerExecutor(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '*' or compound == '&': + return SmartPointerPtrExecutor(space, clsdecl, check_smart[1], check_smart[2]) + # fall through: can still return smart pointer in non-smart way + if compound == '': - return InstanceExecutor(space, cppclass) + return InstanceExecutor(space, clsdecl) elif compound == '*' or compound == '&': - return InstancePtrExecutor(space, cppclass) + return InstancePtrExecutor(space, clsdecl) elif compound == '**' or compound == '*&': - return InstancePtrPtrExecutor(space, cppclass) + return InstancePtrPtrExecutor(space, clsdecl) elif "(anonymous)" in name: # special case: enum w/o a type name return _executors["internal_enum_type_t"](space, None) diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -1284,9 +1284,15 @@ return wrap_cppinstance(self.space, self._rawobject, self.smartdecl, do_cast=False) def destruct(self): - if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): + if self._rawobject: + if self.smartdecl and self.deref: + klass = self.smartdecl + elif not (self.flags & INSTANCE_FLAGS_IS_REF): + klass = self.clsdecl + else: + return memory_regulator.unregister(self) - capi.c_destruct(self.space, self.clsdecl, self._rawobject) + capi.c_destruct(self.space, klass, self._rawobject) self._rawobject = capi.C_NULL_OBJECT def _finalize_(self): diff --git a/pypy/module/_cppyy/test/test_pythonization.py b/pypy/module/_cppyy/test/test_pythonization.py --- a/pypy/module/_cppyy/test/test_pythonization.py +++ b/pypy/module/_cppyy/test/test_pythonization.py @@ -133,15 +133,21 @@ mine = pz.gime_mine_ptr() assert type(mine) == Countable + assert mine.m_check == 0xcdcdcdcd assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.__smartptr__().get().m_check == 0xcdcdcdcd assert mine.say_hi() == "Hi!" mine = pz.gime_mine_ref() assert type(mine) == Countable + assert mine.m_check == 0xcdcdcdcd assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.__smartptr__().get().m_check == 0xcdcdcdcd assert mine.say_hi() == "Hi!" mine = pz.gime_mine() assert type(mine) == Countable + assert mine.m_check == 0xcdcdcdcd assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.__smartptr__().get().m_check == 0xcdcdcdcd assert mine.say_hi() == "Hi!" From pypy.commits at gmail.com Thu Jun 7 11:59:19 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:19 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: add pythonization tests Message-ID: <5b195657.1c69fb81.8da3.009d@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94735:3fd316125c15 Date: 2018-05-21 14:44 -0700 http://bitbucket.org/pypy/pypy/changeset/3fd316125c15/ Log: add pythonization tests diff --git a/pypy/module/_cppyy/test/pythonizables.cxx b/pypy/module/_cppyy/test/pythonizables.cxx new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/pythonizables.cxx @@ -0,0 +1,29 @@ +#include "pythonizables.h" + + +pyzables::MyBase::~MyBase() {} +pyzables::MyDerived::~MyDerived() {} + +pyzables::MyBase* pyzables::GimeDerived() { + return new MyDerived(); +} + + +//=========================================================================== +int pyzables::Countable::sInstances = 0; +pyzables::SharedCountable_t pyzables::mine = + pyzables::SharedCountable_t(new pyzables::Countable); + +void pyzables::renew_mine() { mine = std::shared_ptr(new Countable); } + +pyzables::SharedCountable_t pyzables::gime_mine() { return mine; } +pyzables::SharedCountable_t* pyzables::gime_mine_ptr() { return &mine; } +pyzables::SharedCountable_t& pyzables::gime_mine_ref() { return mine; } + +unsigned int pyzables::pass_mine_sp(std::shared_ptr ptr) { return ptr->m_check; } +unsigned int pyzables::pass_mine_sp_ref(std::shared_ptr& ptr) { return ptr->m_check; } +unsigned int pyzables::pass_mine_sp_ptr(std::shared_ptr* ptr) { return (*ptr)->m_check; } + +unsigned int pyzables::pass_mine_rp(Countable c) { return c.m_check; } +unsigned int pyzables::pass_mine_rp_ref(const Countable& c) { return c.m_check; } +unsigned int pyzables::pass_mine_rp_ptr(const Countable* c) { return c->m_check; } diff --git a/pypy/module/_cppyy/test/pythonizables.h b/pypy/module/_cppyy/test/pythonizables.h new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/pythonizables.h @@ -0,0 +1,60 @@ +#include +#include + + +namespace pyzables { + +//=========================================================================== +class SomeDummy1 {}; +class SomeDummy2 {}; + + +//=========================================================================== +class MyBase { +public: + virtual ~MyBase(); +}; +class MyDerived : public MyBase { +public: + virtual ~MyDerived(); +}; + +MyBase* GimeDerived(); + + +//=========================================================================== +class Countable { +public: + Countable() { ++sInstances; } + Countable(const Countable&) { ++sInstances; } + Countable& operator=(const Countable&) { return *this; } + ~Countable() { --sInstances; } + +public: + virtual const char* say_hi() { return "Hi!"; } + +public: + unsigned int m_check = 0xcdcdcdcd; + +public: + static int sInstances; +}; + +typedef std::shared_ptr SharedCountable_t; +extern SharedCountable_t mine; + +void renew_mine(); + +SharedCountable_t gime_mine(); +SharedCountable_t* gime_mine_ptr(); +SharedCountable_t& gime_mine_ref(); + +unsigned int pass_mine_sp(SharedCountable_t p); +unsigned int pass_mine_sp_ref(SharedCountable_t& p); +unsigned int pass_mine_sp_ptr(SharedCountable_t* p); + +unsigned int pass_mine_rp(Countable); +unsigned int pass_mine_rp_ref(const Countable&); +unsigned int pass_mine_rp_ptr(const Countable*); + +} // namespace pyzables diff --git a/pypy/module/_cppyy/test/pythonizables.xml b/pypy/module/_cppyy/test/pythonizables.xml new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/pythonizables.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/pypy/module/_cppyy/test/test_pythonization.py b/pypy/module/_cppyy/test/test_pythonization.py new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/test_pythonization.py @@ -0,0 +1,147 @@ +import py, os, sys +from pytest import raises +from .support import setup_make + + +currpath = py.path.local(__file__).dirpath() +test_dct = str(currpath.join("pythonizablesDict.so")) + +def setup_module(mod): + setup_make("pythonizablesDict.so") + +class AppTestPYTHONIZATION: + spaceconfig = dict(usemodules=['_cppyy', '_rawffi', 'itertools']) + + def setup_class(cls): + cls.w_test_dct = cls.space.newtext(test_dct) + cls.w_datatypes = cls.space.appexec([], """(): + import ctypes, _cppyy + _cppyy._post_import_startup() + return ctypes.CDLL(%r, ctypes.RTLD_GLOBAL)""" % (test_dct, )) + + def test00_api(self): + """Test basic semantics of the pythonization API""" + + import _cppyy + + raises(TypeError, _cppyy.add_pythonization, 1) + + def pythonizor1(klass, name): + pass + + def pythonizor2(klass, name): + pass + + pythonizor3 = pythonizor1 + + _cppyy.add_pythonization(pythonizor1) + assert _cppyy.remove_pythonization(pythonizor2) == False + assert _cppyy.remove_pythonization(pythonizor3) == True + + def test01_more_api(self): + """Further API semantics""" + + import _cppyy as cppyy + + def pythonizor(klass, name): + if name == 'pyzables::SomeDummy1': + klass.test = 1 + + cppyy.add_pythonization(pythonizor) + assert cppyy.gbl.pyzables.SomeDummy1.test == 1 + + def pythonizor(klass, name): + if name == 'SomeDummy2': + klass.test = 2 + cppyy.add_pythonization(pythonizor, 'pyzables') + + def pythonizor(klass, name): + if name == 'pyzables::SomeDummy2': + klass.test = 3 + cppyy.add_pythonization(pythonizor) + + assert cppyy.gbl.pyzables.SomeDummy2.test == 2 + + def root_pythonizor(klass, name): + if name == 'TString': + klass.__len__ = klass.Length + + cppyy.add_pythonization(root_pythonizor) + + assert len(cppyy.gbl.TString("aap")) == 3 + + def test02_type_pinning(self): + """Verify pinnability of returns""" + + import _cppyy as cppyy + + cppyy.gbl.pyzables.GimeDerived._creates = True + + result = cppyy.gbl.pyzables.GimeDerived() + assert type(result) == cppyy.gbl.pyzables.MyDerived + + cppyy._pin_type(cppyy.gbl.pyzables.MyBase) + assert type(result) == cppyy.gbl.pyzables.MyDerived + + + def test03_transparency(self): + """Transparent use of smart pointers""" + + import _cppyy as cppyy + + Countable = cppyy.gbl.pyzables.Countable + mine = cppyy.gbl.pyzables.mine + + assert type(mine) == Countable + assert mine.m_check == 0xcdcdcdcd + assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.__smartptr__().get().m_check == 0xcdcdcdcd + assert mine.say_hi() == "Hi!" + + def test04_converters(self): + """Smart pointer argument passing""" + + import _cppyy as cppyy + + pz = cppyy.gbl.pyzables + mine = pz.mine + + assert 0xcdcdcdcd == pz.pass_mine_rp_ptr(mine) + assert 0xcdcdcdcd == pz.pass_mine_rp_ref(mine) + assert 0xcdcdcdcd == pz.pass_mine_rp(mine) + + assert 0xcdcdcdcd == pz.pass_mine_sp_ptr(mine) + assert 0xcdcdcdcd == pz.pass_mine_sp_ref(mine) + + assert 0xcdcdcdcd == pz.pass_mine_sp_ptr(mine.__smartptr__()) + assert 0xcdcdcdcd == pz.pass_mine_sp_ref(mine.__smartptr__()) + + assert 0xcdcdcdcd == pz.pass_mine_sp(mine) + assert 0xcdcdcdcd == pz.pass_mine_sp(mine.__smartptr__()) + + # TODO: + # cppyy.gbl.mine = mine + pz.renew_mine() + + def test05_executors(self): + """Smart pointer return types""" + + import _cppyy as cppyy + + pz = cppyy.gbl.pyzables + Countable = pz.Countable + + mine = pz.gime_mine_ptr() + assert type(mine) == Countable + assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.say_hi() == "Hi!" + + mine = pz.gime_mine_ref() + assert type(mine) == Countable + assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.say_hi() == "Hi!" + + mine = pz.gime_mine() + assert type(mine) == Countable + assert type(mine.__smartptr__()) == cppyy.gbl.std.shared_ptr(Countable) + assert mine.say_hi() == "Hi!" From pypy.commits at gmail.com Thu Jun 7 11:59:23 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 08:59:23 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: reduce layers in method dispatch for simplicity, performance, and support of templated methods (this requires backend Message-ID: <5b19565b.1c69fb81.dfcb6.0f0b@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94737:264a0794b659 Date: 2018-06-07 08:40 -0700 http://bitbucket.org/pypy/pypy/changeset/264a0794b659/ Log: reduce layers in method dispatch for simplicity, performance, and support of templated methods (this requires backend 1.1.0) diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -88,7 +88,7 @@ assert obj._voidp != rffi.cast(rffi.VOIDP, 0) data = rffi.cast(rffi.VOIDPP, data) data[0] = obj._voidp - else: # only other use is sring + else: # only other use is string assert obj.tc == 's' n = len(obj._string) assert raw_string == rffi.cast(rffi.CCHARP, 0) @@ -183,8 +183,7 @@ 'constructor' : ([c_method, c_object, c_int, c_voidp], c_object), 'call_o' : ([c_method, c_object, c_int, c_voidp, c_type], c_object), - 'function_address_from_index' : ([c_scope, c_index], c_voidp), # TODO: verify - 'function_address_from_method' : ([c_method], c_voidp), # id. + 'function_address' : ([c_method], c_voidp), # TODO: verify # handling of function argument buffer 'allocate_function_args' : ([c_int], c_voidp), @@ -216,30 +215,30 @@ 'num_methods' : ([c_scope], c_int), 'method_indices_from_name' : ([c_scope, c_ccharp], c_index_array), - 'method_name' : ([c_scope, c_index], c_ccharp), - 'method_mangled_name' : ([c_scope, c_index], c_ccharp), - 'method_result_type' : ([c_scope, c_index], c_ccharp), - 'method_num_args' : ([c_scope, c_index], c_int), - 'method_req_args' : ([c_scope, c_index], c_int), - 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), - 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), - 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), + 'get_method' : ([c_scope, c_index], c_method), + + 'method_name' : ([c_method], c_ccharp), + 'method_mangled_name' : ([c_method], c_ccharp), + 'method_result_type' : ([c_method], c_ccharp), + 'method_num_args' : ([c_method], c_int), + 'method_req_args' : ([c_method], c_int), + 'method_arg_type' : ([c_method, c_int], c_ccharp), + 'method_arg_default' : ([c_method, c_int], c_ccharp), + 'method_signature' : ([c_method, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_method, c_int], c_ccharp), 'is_const_method' : ([c_method], c_int), 'exists_method_template' : ([c_scope, c_ccharp], c_int), 'method_is_template' : ([c_scope, c_index], c_int), - 'method_num_template_args' : ([c_scope, c_index], c_int), - 'method_template_arg_name' : ([c_scope, c_index, c_index], c_ccharp), + 'get_method_template' : ([c_scope, c_ccharp, c_ccharp], c_method), - 'get_method' : ([c_scope, c_index], c_method), 'get_global_operator' : ([c_scope, c_scope, c_scope, c_ccharp], c_index), # method properties - 'is_public_method' : ([c_type, c_index], c_int), - 'is_constructor' : ([c_type, c_index], c_int), - 'is_destructor' : ([c_type, c_index], c_int), - 'is_staticmethod' : ([c_type, c_index], c_int), + 'is_public_method' : ([c_method], c_int), + 'is_constructor' : ([c_method], c_int), + 'is_destructor' : ([c_method], c_int), + 'is_staticmethod' : ([c_method], c_int), # data member reflection information 'num_datamembers' : ([c_scope], c_int), @@ -417,13 +416,9 @@ args = [_ArgH(cppmethod), _ArgH(cppobject), _ArgL(nargs), _ArgP(cargs), _ArgH(cppclass.handle)] return _cdata_to_cobject(space, call_capi(space, 'call_o', args)) -def c_function_address_from_index(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_function_address(space, cppmethod): return rffi.cast(C_FUNC_PTR, - _cdata_to_ptr(space, call_capi(space, 'function_address_from_index', args))) -def c_function_address_from_method(space, cppmethod): - return rffi.cast(C_FUNC_PTR, - _cdata_to_ptr(space, call_capi(space, 'function_address_from_method', [_ArgH(cppmethod)]))) + _cdata_to_ptr(space, call_capi(space, 'function_address', [_ArgH(cppmethod)]))) # handling of function argument buffer --------------------------------------- def c_allocate_function_args(space, size): @@ -527,30 +522,34 @@ c_free(space, rffi.cast(rffi.VOIDP, indices)) # c_free defined below return py_indices -def c_method_name(space, cppscope, index): +def c_get_method(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] - return charp2str_free(space, call_capi(space, 'method_name', args)) -def c_method_result_type(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return charp2str_free(space, call_capi(space, 'method_result_type', args)) -def c_method_num_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_num_args', args)) -def c_method_req_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_req_args', args)) -def c_method_arg_type(space, cppscope, index, arg_index): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] + return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method', args))) + +def c_method_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_name', [_ArgH(cppmeth)])) +def c_method_mangled_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_mangled_name', [_ArgH(cppmeth)])) +def c_method_result_type(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_result_type', [_ArgH(cppmeth)])) +def c_method_num_args(space, cppmeth): + return space.int_w(call_capi(space, 'method_num_args', [_ArgH(cppmeth)])) +def c_method_req_args(space, cppmeth): + return space.int_w(call_capi(space, 'method_req_args', [_ArgH(cppmeth)])) +def c_method_arg_type(space, cppmeth, arg_index): + args = [_ArgH(cppmeth), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_type', args)) -def c_method_arg_default(space, cppscope, index, arg_index): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] +def c_method_arg_default(space, cppmeth, arg_index): + args = [_ArgH(cppmeth), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index, show_formalargs=True): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] +def c_method_signature(space, cppmeth, show_formalargs=True): + args = [_ArgH(cppmeth), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) -def c_method_prototype(space, cppscope, index, show_formalargs=True): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] +def c_method_prototype(space, cppscope, cppmeth, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgH(cppmeth), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_prototype', args)) +def c_is_const_method(space, cppmeth): + return space.bool_w(call_capi(space, 'is_const_method', [_ArgH(cppmeth)])) def c_exists_method_template(space, cppscope, name): args = [_ArgH(cppscope.handle), _ArgS(name)] @@ -558,21 +557,10 @@ def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] return space.bool_w(call_capi(space, 'method_is_template', args)) -def _c_method_num_template_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_num_template_args', args)) -def c_template_args(space, cppscope, index): - nargs = _c_method_num_template_args(space, cppscope, index) - arg1 = _ArgH(cppscope.handle) - arg2 = _ArgL(index) - args = [c_resolve_name(space, charp2str_free(space, - call_capi(space, 'method_template_arg_name', [arg1, arg2, _ArgL(iarg)])) - ) for iarg in range(nargs)] - return args -def c_get_method(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method', args))) +def c_get_method_template(space, cppscope, name): + args = [_ArgH(cppscope.handle), _ArgS(name)] + return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method_template', args))) def c_get_global_operator(space, nss, lc, rc, op): if nss is not None: args = [_ArgH(nss.handle), _ArgH(lc.handle), _ArgH(rc.handle), _ArgS(op)] @@ -580,18 +568,14 @@ return rffi.cast(WLAVC_INDEX, -1) # method properties ---------------------------------------------------------- -def c_is_public_method(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_public_method', args)) -def c_is_constructor(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_constructor', args)) -def c_is_destructor(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_destructor', args)) -def c_is_staticmethod(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_staticmethod', args)) +def c_is_public_method(space, cppmeth): + return space.bool_w(call_capi(space, 'is_public_method', [_ArgH(cppmeth)])) +def c_is_constructor(space, cppmeth): + return space.bool_w(call_capi(space, 'is_constructor', [_ArgH(cppmeth)])) +def c_is_destructor(space, cppmeth): + return space.bool_w(call_capi(space, 'is_destructor', [_ArgH(cppmeth)])) +def c_is_staticmethod(space, cppmeth): + return space.bool_w(call_capi(space, 'is_staticmethod', [_ArgH(cppmeth)])) # data member reflection information ----------------------------------------- def c_num_datamembers(space, cppscope): diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -69,8 +69,8 @@ # array type try: arr = space.interp_w(W_ArrayInstance, w_obj, can_be_None=True) - if arr: - return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) + #if arr: + #return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass # pre-defined nullptr @@ -384,7 +384,7 @@ arg = space.text_w(w_obj) x[0] = rffi.cast(rffi.LONG, rffi.str2charp(arg)) ba = rffi.cast(rffi.CCHARP, address) - ba[capi.c_function_arg_typeoffset(space)] = 'o' + ba[capi.c_function_arg_typeoffset(space)] = 'p' def from_memory(self, space, w_obj, w_pycppclass, offset): address = self._get_raw_address(space, w_obj, offset) @@ -500,7 +500,7 @@ obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, - "cannot pass %T as %s", w_obj, self.clsdecl.name) + "cannot pass %T instance as %s", w_obj, self.clsdecl.name) def cffi_type(self, space): state = space.fromcache(ffitypes.State) @@ -615,8 +615,7 @@ address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy - assign.call( - interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) + assign.call_impl(address, [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -687,8 +686,7 @@ m = cppol.functions[i] if m.signature(False) == self.signature: x = rffi.cast(rffi.VOIDPP, address) - x[0] = rffi.cast(rffi.VOIDP, - capi.c_function_address_from_method(space, m.cppmethod)) + x[0] = rffi.cast(rffi.VOIDP, capi.c_function_address(space, m.cppmethod)) address = rffi.cast(capi.C_OBJECT, address) ba = rffi.cast(rffi.CCHARP, address) ba[capi.c_function_arg_typeoffset(space)] = 'p' @@ -731,7 +729,7 @@ return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, - "cannot pass %T as %s", w_obj, self.clsdecl.name) + "cannot pass %T instance as %s", w_obj, self.rawdecl.name) def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) @@ -799,6 +797,8 @@ try: # array_index may be negative to indicate no size or no size found array_size = helper.array_size(_name) # uses original arg + # TODO: using clean_name here drops const (e.g. const char[] will + # never be seen this way) return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -971,6 +971,7 @@ # special case, const char* w/ size and w/o '\0' _a_converters["const char[]"] = CStringConverterWithSize + _a_converters["char[]"] = _a_converters["const char[]"] # debatable _build_array_converters() diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -76,9 +76,7 @@ cppyy_object_t cppyy_call_o(cppyy_method_t method, cppyy_object_t self, int nargs, void* args, cppyy_type_t result_type); RPY_EXTERN - cppyy_funcaddr_t cppyy_function_address_from_index(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN - cppyy_funcaddr_t cppyy_function_address_from_method(cppyy_method_t method); + cppyy_funcaddr_t cppyy_function_address(cppyy_method_t method); /* handling of function argument buffer ----------------------------------- */ RPY_EXTERN @@ -132,23 +130,26 @@ cppyy_index_t* cppyy_method_indices_from_name(cppyy_scope_t scope, const char* name); RPY_EXTERN - char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); + cppyy_method_t cppyy_get_method(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN - char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_name(cppyy_method_t); RPY_EXTERN - char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_mangled_name(cppyy_method_t); RPY_EXTERN - int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_result_type(cppyy_method_t); RPY_EXTERN - int cppyy_method_req_args(cppyy_scope_t scope, cppyy_index_t idx); + int cppyy_method_num_args(cppyy_method_t); RPY_EXTERN - char* cppyy_method_arg_type(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); + int cppyy_method_req_args(cppyy_method_t); RPY_EXTERN - char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); + char* cppyy_method_arg_type(cppyy_method_t, int arg_index); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + char* cppyy_method_arg_default(cppyy_method_t, int arg_index); RPY_EXTERN - char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + char* cppyy_method_signature(cppyy_method_t, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_method_t idx, int show_formalargs); RPY_EXTERN int cppyy_is_const_method(cppyy_method_t); @@ -157,25 +158,21 @@ RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN - int cppyy_method_num_template_args(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN - char* cppyy_method_template_arg_name(cppyy_scope_t scope, cppyy_index_t idx, cppyy_index_t iarg); + cppyy_method_t cppyy_get_method_template(cppyy_scope_t scope, const char* name); RPY_EXTERN - cppyy_method_t cppyy_get_method(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN cppyy_index_t cppyy_get_global_operator( cppyy_scope_t scope, cppyy_scope_t lc, cppyy_scope_t rc, const char* op); /* method properties ------------------------------------------------------ */ RPY_EXTERN - int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_publicmethod(cppyy_method_t); RPY_EXTERN - int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_constructor(cppyy_method_t); RPY_EXTERN - int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_destructor(cppyy_method_t); RPY_EXTERN - int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_staticmethod(cppyy_method_t); /* data member reflection information ------------------------------------- */ RPY_EXTERN diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -158,17 +158,17 @@ #----- -# Classes involved with methods and functions: +# Classes involved with methods and functions come at two levels: +# - overloads: user-facing collections of overloaded functions +# - wrappers: internal holders of the individual C++ methods # -# CPPMethod: base class wrapping a single function or method -# CPPConstructor: specialization for allocating a new object -# CPPFunction: specialization for free and static functions +# W_CPPOverload: instance methods (base class) +# W_CPPConstructorOverload: constructors +# W_CPPStaticOverload: free and static functions +# W_CPPTemplateOverload: templated methods/functions +# +# CPPMethod: a single function or method (base class) # CPPSetItem: specialization for Python's __setitem__ -# CPPTemplateMethod: trampoline to instantiate and bind templated functions -# W_CPPOverload, W_CPPConstructorOverload, W_CPPTemplateOverload: -# user-facing, app-level, collection of overloads, with specializations -# for constructors and templates -# W_CPPBoundMethod: instantiated template method # # All methods/functions derive from CPPMethod and are collected as overload # candidates in user-facing overload classes. Templated methods are a two-step @@ -181,15 +181,15 @@ also takes care of offset casting and recycling of known objects through the memory_regulator.""" - _attrs_ = ['space', 'scope', 'index', 'cppmethod', 'arg_defs', 'args_required', + _attrs_ = ['space', 'scope', 'cppmethod', 'arg_defs', 'args_required', 'converters', 'executor', '_funcaddr', 'cif_descr', 'uses_local'] - _immutable_ = True + _immutable_fields_ = ['scope', 'cppmethod', 'arg_defs', 'args_required', + 'converters', 'executor', 'uses_local'] - def __init__(self, space, declaring_scope, method_index, arg_defs, args_required): + def __init__(self, space, declaring_scope, cppmethod, arg_defs, args_required): self.space = space self.scope = declaring_scope - self.index = method_index - self.cppmethod = capi.c_get_method(self.space, self.scope, method_index) + self.cppmethod = cppmethod self.arg_defs = arg_defs self.args_required = args_required @@ -201,12 +201,6 @@ self._funcaddr = lltype.nullptr(capi.C_FUNC_PTR.TO) self.uses_local = False - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - cppinstance = space.interp_w(W_CPPInstance, w_cppinstance) - cppinstance._nullcheck() - return cppinstance.get_cppthis(declaring_scope) - def _address_from_local_buffer(self, call_local, idx): if not call_local: return call_local @@ -349,7 +343,7 @@ self.converters = [converter.get_converter(self.space, arg_type, arg_dflt) for arg_type, arg_dflt in self.arg_defs] self.executor = executor.get_executor( - self.space, capi.c_method_result_type(self.space, self.scope, self.index)) + self.space, capi.c_method_result_type(self.space, self.cppmethod)) for conv in self.converters: if conv.uses_local: @@ -359,7 +353,7 @@ # Each CPPMethod corresponds one-to-one to a C++ equivalent and cppthis # has been offset to the matching class. Hence, the libffi pointer is # uniquely defined and needs to be setup only once. - funcaddr = capi.c_function_address_from_index(self.space, self.scope, self.index) + funcaddr = capi.c_function_address(self.space, self.cppmethod) if funcaddr and cppthis: # TODO: methods only for now state = self.space.fromcache(ffitypes.State) @@ -427,10 +421,10 @@ capi.c_deallocate_function_args(self.space, args) def signature(self, show_formalargs=True): - return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + return capi.c_method_signature(self.space, self.cppmethod, show_formalargs) def prototype(self, show_formalargs=True): - return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) + return capi.c_method_prototype(self.space, self.scope, self.cppmethod, show_formalargs) def priority(self): total_arg_priority = 0 @@ -440,8 +434,11 @@ @rgc.must_be_light_finalizer def __del__(self): - if self.cif_descr: - lltype.free(self.cif_descr, flavor='raw') + try: + if self.cif_descr: + lltype.free(self.cif_descr, flavor='raw') + except Exception: # TODO: happens for templates, why? + pass def __repr__(self): return "CPPMethod: %s" % self.prototype() @@ -450,80 +447,12 @@ assert 0, "you should never have a pre-built instance of this!" -class CPPFunction(CPPMethod): - """Global (namespaced) / static function dispatcher.""" - - _immutable_ = True - - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - return capi.C_NULL_OBJECT - - def __repr__(self): - return "CPPFunction: %s" % self.prototype() - - -class CPPTemplateMethod(CPPMethod): - """Method dispatcher that first resolves the template instance.""" - - _attrs_ = ['space', 'templ_args'] - _immutable_ = True - - def __init__(self, space, templ_args, declaring_scope, method_index, arg_defs, args_required): - self.space = space - self.templ_args = templ_args - # TODO: might have to specialize for CPPTemplateMethod on CPPMethod/CPPFunction here - CPPMethod.__init__(self, space, declaring_scope, method_index, arg_defs, args_required) - - def call(self, cppthis, args_w, useffi): - assert lltype.typeOf(cppthis) == capi.C_OBJECT - for i in range(len(args_w)): - try: - s = self.space.text_w(args_w[i]) - except OperationError: - s = self.space.text_w(self.space.getattr(args_w[i], self.space.newtext('__name__'))) - s = capi.c_resolve_name(self.space, s) - if s != self.templ_args[i]: - raise oefmt(self.space.w_TypeError, - "non-matching template (got %s where %s expected)", - s, self.templ_args[i]) - return W_CPPBoundMethod(cppthis, self, useffi) - - def bound_call(self, cppthis, args_w, useffi): - return CPPMethod.call(self, cppthis, args_w, useffi) - - def __repr__(self): - return "CPPTemplateMethod: %s" % self.prototype() - - -class CPPConstructor(CPPMethod): - """Method dispatcher that constructs new objects. This method can not have - a fast path, as the allocation of the object is currently left to the - reflection layer only, since the C++ class may have an overloaded operator - new, disallowing malloc here.""" - - _immutable_ = True - - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - return rffi.cast(capi.C_OBJECT, declaring_scope.handle) - - def call(self, cppthis, args_w, useffi): - # Note: this does not return a wrapped instance, just a pointer to the - # new instance; the overload must still wrap it before returning. Also, - # cppthis is declaring_scope.handle (as per unpack_cppthis(), above). - return CPPMethod.call(self, cppthis, args_w, useffi) - - def __repr__(self): - return "CPPConstructor: %s" % self.prototype() - - class CPPSetItem(CPPMethod): """Method dispatcher specific to Python's __setitem__ mapped onto C++'s operator[](int). The former function takes an extra argument to assign to the return type of the latter.""" - _immutable_ = True + _attrs_ = [] def call(self, cppthis, args_w, useffi): end = len(args_w)-1 @@ -537,46 +466,44 @@ class W_CPPOverload(W_Root): - """Dispatcher that is actually available at the app-level: it is a - collection of (possibly) overloaded methods or functions. It calls these - in order and deals with error handling and reporting.""" + """App-level dispatcher: controls a collection of (potentially) overloaded methods + or functions. Calls these in order and deals with error handling and reporting.""" - _attrs_ = ['space', 'scope', 'functions', 'flags'] + _attrs_ = ['space', 'scope', 'functions', 'flags', 'w_this'] _immutable_fields_ = ['scope', 'functions[*]'] - def __init__(self, space, declaring_scope, functions): - self.space = space - self.scope = declaring_scope - assert len(functions) + def __init__(self, space, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + self.space = space + self.scope = declaring_scope from rpython.rlib import debug self.functions = debug.make_sure_not_resized(functions) - self.flags = 0 - self.flags |= OVERLOAD_FLAGS_USE_FFI + self.flags = flags + self.w_this = self.space.w_None - # allow user to determine ffi use rules per overload - def fget_useffi(self, space): - return space.newbool(bool(self.flags & OVERLOAD_FLAGS_USE_FFI)) + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound, so no new instance needed + cppol = W_CPPOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound - @unwrap_spec(value=bool) - def fset_useffi(self, space, value): - if space.is_true(value): - self.flags |= OVERLOAD_FLAGS_USE_FFI + @unwrap_spec(args_w='args_w') + def call(self, args_w): + if self.space.is_w(self.w_this, self.space.w_None) and len(args_w): + w_this = args_w[0] + args_w = args_w[1:] else: - self.flags &= ~OVERLOAD_FLAGS_USE_FFI - - @jit.elidable_promote() - def is_static(self): - if isinstance(self.functions[0], CPPFunction): - return self.space.w_True - return self.space.w_False + w_this = self.w_this + cppinstance = self.space.interp_w(W_CPPInstance, w_this) + cppinstance._nullcheck() + if not capi.c_is_subtype(self.space, cppinstance.clsdecl, self.scope): + raise oefmt(self.space.w_TypeError, + "cannot pass %T instance as %s", w_this, self.scope.name) + return self.call_impl(cppinstance.get_cppthis(self.scope), args_w) @jit.unroll_safe - @unwrap_spec(args_w='args_w') - def call(self, w_cppinstance, args_w): - # instance handling is specific to the function type only, so take it out - # of the loop over function overloads - cppthis = self.functions[0].unpack_cppthis( - self.space, w_cppinstance, self.functions[0].scope) + def call_impl(self, cppthis, args_w): assert lltype.typeOf(cppthis) == capi.C_OBJECT # The following code tries out each of the functions in order. If @@ -634,38 +561,96 @@ sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) + # allow user to determine ffi use rules per overload + def fget_useffi(self, space): + return space.newbool(bool(self.flags & OVERLOAD_FLAGS_USE_FFI)) + + @unwrap_spec(value=bool) + def fset_useffi(self, space, value): + if space.is_true(value): + self.flags |= OVERLOAD_FLAGS_USE_FFI + else: + self.flags &= ~OVERLOAD_FLAGS_USE_FFI + + def fget_doc(self, space): + return self.prototype() + def __repr__(self): return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', - call = interp2app(W_CPPOverload.call), - is_static = interp2app(W_CPPOverload.is_static), + __get__ = interp2app(W_CPPOverload.descr_get), + __call__ = interp2app(W_CPPOverload.call), __useffi__ = GetSetProperty(W_CPPOverload.fget_useffi, W_CPPOverload.fset_useffi), - prototype = interp2app(W_CPPOverload.prototype), + __doc__ = GetSetProperty(W_CPPOverload.fget_doc) ) +# overload collection of static (class and free) functions; these differ +# from methods only in the handling of 'cppthis' +class W_CPPStaticOverload(W_CPPOverload): + _attrs_ = [] + + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if isinstance(w_cppinstance, W_CPPInstance): + # two possibilities: this is a static function called on an + # instance and w_this must not be set, or a free function rebound + # onto a class and w_this should be set + cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance) + if cppinstance.clsdecl.handle != self.scope.handle: + cppol = W_CPPStaticOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound + return self # unbound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + if not self.space.is_w(self.w_this, self.space.w_None): + # free function used as bound method, put self back into args_w + cppinstance = self.space.interp_w(W_CPPInstance, self.w_this) + cppinstance._nullcheck() + args_w = [self.w_this] + args_w + return self.call_impl(capi.C_NULL_OBJECT, args_w) + + def __repr__(self): + return "W_CPPStaticOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPStaticOverload.typedef = TypeDef( + 'CPPStaticOverload', + __get__ = interp2app(W_CPPStaticOverload.descr_get), + __call__ = interp2app(W_CPPStaticOverload.call), + __useffi__ = GetSetProperty(W_CPPStaticOverload.fget_useffi, W_CPPStaticOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPStaticOverload.fget_doc) +) + + class W_CPPConstructorOverload(W_CPPOverload): - @jit.elidable_promote() - def is_static(self): - return self.space.w_False + _attrs_ = [] - @jit.elidable_promote() - def unpack_cppthis(self, w_cppinstance): - return rffi.cast(capi.C_OBJECT, self.scope.handle) + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound (TODO: probably useless) + cppol = W_CPPConstructorOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound - @jit.unroll_safe @unwrap_spec(args_w='args_w') - def call(self, w_cppinstance, args_w): + def call(self, args_w): # TODO: factor out the following: if capi.c_is_abstract(self.space, self.scope.handle): raise oefmt(self.space.w_TypeError, "cannot instantiate abstract class '%s'", self.scope.name) - w_result = W_CPPOverload.call(self, w_cppinstance, args_w) + if self.space.is_w(self.w_this, self.space.w_None) and len(args_w): + cppinstance = self.space.interp_w(W_CPPInstance, args_w[0]) + args_w = args_w[1:] + else: + cppinstance = self.space.interp_w(W_CPPInstance, self.w_this) + w_result = self.call_impl(rffi.cast(capi.C_OBJECT, self.scope.handle), args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) - cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) @@ -675,47 +660,86 @@ W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', - call = interp2app(W_CPPConstructorOverload.call), - is_static = interp2app(W_CPPConstructorOverload.is_static), - prototype = interp2app(W_CPPConstructorOverload.prototype), + __get__ = interp2app(W_CPPConstructorOverload.descr_get), + __call__ = interp2app(W_CPPConstructorOverload.call), + __doc__ = GetSetProperty(W_CPPConstructorOverload.fget_doc) ) class W_CPPTemplateOverload(W_CPPOverload): + """App-level dispatcher to allow both lookup/instantiation of templated methods and + dispatch among overloads between templated and non-templated overloads.""" + + _attrs_ = ['name', 'overloads', 'master'] + _immutable_fields_ = ['name'] + + def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + W_CPPOverload.__init__(self, space, declaring_scope, functions, flags) + self.name = name + self.overloads = {} + self.master = None + @unwrap_spec(args_w='args_w') - def __getitem__(self, args_w): - pass + def descr_get(self, w_cppinstance, args_w): + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound + cppol = W_CPPTemplateOverload(self.space, self.name, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + cppol.master = self + return cppol # bound + + @unwrap_spec(args_w='args_w') + def getitem(self, args_w): + space = self.space + tmpl_args = '' + for i in range(len(args_w)): + w_obj = args_w[i] + if space.isinstance_w(w_obj, space.w_text): + s = space.text_w(w_obj) # string describing type + elif space.isinstance_w(w_obj, space.w_type): + try: + # cppyy bound types + name = space.getattr(w_obj, space.newtext('__cppname__')) + except OperationError: + # generic python types + name = space.getattr(w_obj, space.newtext('__name__')) + s = space.text_w(name) + else: + # builtin types etc. + s = space.text_w(space.str(w_obj)) + if i != 0: tmpl_args += ', ' + tmpl_args += s + fullname = self.name+'<'+tmpl_args+'>' + + # find/instantiate new callable function + master = self.master + if not master: + master = self + try: + return master.overloads[fullname].descr_get(self.w_this, []) + except KeyError: + pass + + cppmeth = capi.c_get_method_template(space, self.scope, fullname) + funcs = [] + ftype = self.scope._make_cppfunction(fullname, cppmeth, funcs) + if ftype & FUNCTION_IS_STATIC: + cppol = W_CPPStaticOverload(space, self.scope, funcs[:], self.flags) + else: + cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) + master.overloads[fullname] = cppol + return cppol.descr_get(self.w_this, []) def __repr__(self): return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] W_CPPTemplateOverload.typedef = TypeDef( 'CPPTemplateOverload', - __getitem__ = interp2app(W_CPPTemplateOverload.call), - call = interp2app(W_CPPTemplateOverload.call), - is_static = interp2app(W_CPPTemplateOverload.is_static), - __useffi__ = GetSetProperty(W_CPPTemplateOverload.fget_useffi, W_CPPTemplateOverload.fset_useffi), - prototype = interp2app(W_CPPTemplateOverload.prototype), -) - - -class W_CPPBoundMethod(W_Root): - _attrs_ = ['cppthis', 'method', 'useffi'] - - def __init__(self, cppthis, method, useffi): - self.cppthis = cppthis - self.method = method - self.useffi = useffi - - def __call__(self, args_w): - return self.method.bound_call(self.cppthis, args_w, self.useffi) - - def __repr__(self): - return "W_CPPBoundMethod(%s)" % self.method.prototype() - -W_CPPBoundMethod.typedef = TypeDef( - 'CPPBoundMethod', - __call__ = interp2app(W_CPPBoundMethod.__call__), + __get__ = interp2app(W_CPPTemplateOverload.descr_get), + __getitem__ = interp2app(W_CPPTemplateOverload.getitem), + __call__ = interp2app(W_CPPTemplateOverload.call), + __useffi__ = GetSetProperty(W_CPPTemplateOverload.fget_useffi, W_CPPTemplateOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPTemplateOverload.fget_doc) ) @@ -826,7 +850,16 @@ return space.w_False #----- - +# Classes for data members: +# +# W_CPPScopeDecl : scope base class +# W_CPPNamespaceDecl : namespace scope +# W_CPPClassDecl : class scope +# +# Namespaces and classes mainly differ in lookups of methods. Whereas classes +# can grown templated methods, namespaces are wide open to any additions. Such +# lookups are triggered from get_scoped_pycppitem (in pythonify.py). Further +# specialization is done on the type of data/methods that each can have. class W_CPPScopeDecl(W_Root): _attrs_ = ['space', 'handle', 'flags', 'name', 'overloads', 'datamembers'] @@ -897,15 +930,15 @@ _attrs_ = ['space', 'handle', 'name', 'overloads', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def _make_cppfunction(self, pyname, index, funcs): - num_args = capi.c_method_num_args(self.space, self, index) - args_required = capi.c_method_req_args(self.space, self, index) + def _make_cppfunction(self, pyname, cppmeth, funcs): + num_args = capi.c_method_num_args(self.space, cppmeth) + args_required = capi.c_method_req_args(self.space, cppmeth) arg_defs = [] for i in range(num_args): - arg_type = capi.c_method_arg_type(self.space, self, index, i) - arg_dflt = capi.c_method_arg_default(self.space, self, index, i) + arg_type = capi.c_method_arg_type(self.space, cppmeth, i) + arg_dflt = capi.c_method_arg_default(self.space, cppmeth, i) arg_defs.append((arg_type, arg_dflt)) - funcs.append(CPPFunction(self.space, self, index, arg_defs, args_required)) + funcs.append(CPPMethod(self.space, self, cppmeth, arg_defs, args_required)) return FUNCTION_IS_GLOBAL def _make_datamember(self, dm_name, dm_idx): @@ -922,14 +955,20 @@ def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) - if not indices: - raise self.missing_attribute_error(meth_name) - cppfunctions, ftype = [], 0 - for meth_idx in indices: - ftype |= self._make_cppfunction(meth_name, meth_idx, cppfunctions) - if ftype & FUNCTION_IS_TEMPLATE: - return W_CPPTemplateOverload(self.sace, self, cppfunctions) - return W_CPPOverload(self.space, self, cppfunctions) + if indices: + cppfunctions, ftype = [], 0 + templated = False + for idx in indices: + cppmeth = capi.c_get_method(self.space, self, idx) + ftype |= self._make_cppfunction(meth_name, cppmeth, cppfunctions) + if capi.c_method_is_template(self.space, self, idx): + templated = True + if templated: + return W_CPPTemplateOverload(self.space, meth_name, self, cppfunctions[:]) + return W_CPPStaticOverload(self.space, self, cppfunctions[:]) + elif capi.c_exists_method_template(self.space, self, meth_name): + return W_CPPTemplateOverload(self.space, meth_name, self, []) + raise self.missing_attribute_error(meth_name) def find_datamember(self, dm_name): dm_idx = capi.c_datamember_index(self.space, self, dm_name) @@ -973,69 +1012,71 @@ def _build_overloads(self): assert len(self.overloads) == 0 - overloads_temp = {} + methods_tmp = {}; ftype_tmp = {} for idx in range(capi.c_num_methods(self.space, self)): - if capi.c_is_constructor(self.space, self, idx): + cppmeth = capi.c_get_method(self.space, self, idx) + if capi.c_is_constructor(self.space, cppmeth): pyname = '__init__' else: pyname = helper.map_operator_name(self.space, - capi.c_method_name(self.space, self, idx), - capi.c_method_num_args(self.space, self, idx), - capi.c_method_result_type(self.space, self, idx)) + capi.c_method_name(self.space, cppmeth), + capi.c_method_num_args(self.space, cppmeth), + capi.c_method_result_type(self.space, cppmeth)) try: - detail = overloads_temp[pyname] + methods = methods_tmp[pyname] except KeyError: - detail = [[], 0]; overloads_temp[pyname] = detail - detail[1] |= self._make_cppfunction(pyname, idx, detail[0]) + methods_tmp[pyname] = []; ftype_tmp[pyname] = 0 + methods = methods_tmp[pyname] + ftype_tmp[pyname] |= self._make_cppfunction(pyname, cppmeth, methods) + if capi.c_method_is_template(self.space, self, idx): + ftype_tmp[pyname] |= FUNCTION_IS_TEMPLATE # the following covers the case where the only kind of operator[](idx) # returns are the ones that produce non-const references; these can be # used for __getitem__ just as much as for __setitem__, though - if not "__getitem__" in overloads_temp: + if not "__getitem__" in methods_tmp: try: - sid = overloads_temp["__setitem__"] - gid = [[], 0]; overloads_temp["__getitem__"] = gid - for m in sid[0]: - gid[1] |= self._make_cppfunction("__getitem__", m.index, gid[0]) + si_methods = methods_tmp["__setitem__"] + gi_methods = []; ftype = 0 + for m in si_methods: + ftype |= self._make_cppfunction("__getitem__", m.cppmethod, gi_methods) + methods_tmp["__getitem__"] = gi_methods; ftype_tmp["__getitem__"] = ftype except KeyError: pass # just means there's no __setitem__ either # create the overloads from the method sets - for pyname, detail in overloads_temp.iteritems(): - methods = detail[0] + for pyname, methods in methods_tmp.iteritems(): + ftype = ftype_tmp[pyname] CPPMethodSort(methods).sort() - if pyname == '__init__': - overload = W_CPPConstructorOverload(self.space, self, methods) - elif detail[1] & FUNCTION_IS_TEMPLATE: - overload = W_CPPTemplateOverload(self.space, self, methods) + if ftype & FUNCTION_IS_CONSTRUCTOR: + overload = W_CPPConstructorOverload(self.space, self, methods[:]) + elif ftype & FUNCTION_IS_STATIC: + overload = W_CPPStaticOverload(self.space, self, methods[:]) + elif ftype & FUNCTION_IS_TEMPLATE: + overload = W_CPPTemplateOverload(self.space, pyname, self, methods[:]) else: - overload = W_CPPOverload(self.space, self, methods) + overload = W_CPPOverload(self.space, self, methods[:]) self.overloads[pyname] = overload - def _make_cppfunction(self, pyname, index, funcs): - num_args = capi.c_method_num_args(self.space, self, index) - args_required = capi.c_method_req_args(self.space, self, index) + def _make_cppfunction(self, pyname, cppmeth, funcs): + num_args = capi.c_method_num_args(self.space, cppmeth) + args_required = capi.c_method_req_args(self.space, cppmeth) arg_defs = [] for i in range(num_args): - arg_type = capi.c_method_arg_type(self.space, self, index, i) - arg_dflt = capi.c_method_arg_default(self.space, self, index, i) + arg_type = capi.c_method_arg_type(self.space, cppmeth, i) + arg_dflt = capi.c_method_arg_default(self.space, cppmeth, i) arg_defs.append((arg_type, arg_dflt)) ftype = 0 - if capi.c_is_constructor(self.space, self, index): - cppfunction = CPPConstructor(self.space, self, index, arg_defs, args_required) - ftype = FUNCTION_IS_CONSTRUCTOR - elif capi.c_method_is_template(self.space, self, index): - templ_args = capi.c_template_args(self.space, self, index) - cppfunction = CPPTemplateMethod(self.space, templ_args, self, index, arg_defs, args_required) - ftype = FUNCTION_IS_TEMPLATE - elif capi.c_is_staticmethod(self.space, self, index): - cppfunction = CPPFunction(self.space, self, index, arg_defs, args_required) - ftype = FUNCTION_IS_STATIC - elif pyname == "__setitem__": - cppfunction = CPPSetItem(self.space, self, index, arg_defs, args_required) + if pyname == "__setitem__": + cppfunction = CPPSetItem(self.space, self, cppmeth, arg_defs, args_required) ftype = FUNCTION_IS_SETITEM else: - cppfunction = CPPMethod(self.space, self, index, arg_defs, args_required) - ftype = FUNCTION_IS_METHOD + cppfunction = CPPMethod(self.space, self, cppmeth, arg_defs, args_required) + if capi.c_is_constructor(self.space, cppmeth): + ftype = FUNCTION_IS_CONSTRUCTOR + elif capi.c_is_staticmethod(self.space, cppmeth): + ftype = FUNCTION_IS_STATIC + else: + ftype = FUNCTION_IS_METHOD funcs.append(cppfunction) return ftype @@ -1061,8 +1102,8 @@ datamember = W_CPPDataMember(self.space, self, type_name, offset) self.datamembers[datamember_name] = datamember - def find_overload(self, name): - raise self.missing_attribute_error(name) + def find_overload(self, meth_name): + raise self.missing_attribute_error(meth_name) def find_datamember(self, name): raise self.missing_attribute_error(name) @@ -1227,11 +1268,12 @@ self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: funcs = [] - nss._make_cppfunction("operator==", meth_idx, funcs) - ol = W_CPPOverload(self.space, nss, funcs) + cppmeth = capi.c_get_method(self.space, nss, meth_idx) + nss._make_cppfunction("operator==", cppmeth, funcs) + ol = W_CPPStaticOverload(self.space, nss, funcs[:]) # TODO: cache this operator (not done yet, as the above does not # select all overloads) - return ol.call(self, [self, w_other]) + return ol.call([self, w_other]) except OperationError as e: if not e.match(self.space, self.space.w_TypeError): raise diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -111,14 +111,6 @@ return scope.__module__ + '.' + scope.__name__ return 'cppyy' -def make_static_function(func_name, cppol): - def function(*args): - return cppol.call(None, *args) - function.__name__ = func_name - function.__doc__ = cppol.prototype() - return staticmethod(function) - - def make_cppnamespace(scope, name, decl): # build up a representation of a C++ namespace (namespaces are classes) @@ -147,7 +139,6 @@ break return tuple(bases) - def make_new(decl): def __new__(cls, *args): # create a place-holder only as there may be a derived class defined @@ -160,13 +151,6 @@ return instance return __new__ -def make_method(meth_name, cppol): - def method(self, *args): - return cppol.call(self, *args) - method.__name__ = meth_name - method.__doc__ = cppol.prototype() - return method - def make_cppclass(scope, cl_name, decl): import _cppyy @@ -188,7 +172,7 @@ # prepare dictionary for python-side C++ class representation def dispatch(self, m_name, signature): cppol = decl.__dispatch__(m_name, signature) - return types.MethodType(make_method(m_name, cppol), self, type(self)) + return types.MethodType(cppol, self, type(self)) d_class = {"__cppdecl__" : decl, "__new__" : make_new(decl), "__module__" : make_module_name(scope), @@ -199,10 +183,7 @@ # insert (static) methods into the class dictionary for m_name in decl.get_method_names(): cppol = decl.get_overload(m_name) - if cppol.is_static(): - d_class[m_name] = make_static_function(m_name, cppol) - else: - d_class[m_name] = make_method(m_name, cppol) + d_class[m_name] = cppol # add all data members to the dictionary of the class to be created, and # static ones also to the metaclass (needed for property setters) @@ -267,8 +248,7 @@ if not cppitem: try: cppitem = scope.__cppdecl__.get_overload(name) - pycppitem = make_static_function(name, cppitem) - setattr(scope.__class__, name, pycppitem) + setattr(scope.__class__, name, cppitem) pycppitem = getattr(scope, name) # binds function as needed except AttributeError: pass diff --git a/pypy/module/_cppyy/src/dummy_backend.cxx b/pypy/module/_cppyy/src/dummy_backend.cxx --- a/pypy/module/_cppyy/src/dummy_backend.cxx +++ b/pypy/module/_cppyy/src/dummy_backend.cxx @@ -924,6 +924,15 @@ /* method/function reflection information --------------------------------- */ +cppyy_method_t cppyy_get_method(cppyy_scope_t handle, cppyy_index_t method_index) { + if (s_scopes.find(handle) != s_scopes.end()) { + long id = s_scopes[handle].m_method_offset + (long)method_index; + return (cppyy_method_t)id; + } + assert(!"unknown class in cppyy_get_method"); + return (cppyy_method_t)0; +} + int cppyy_num_methods(cppyy_scope_t handle) { return s_scopes[handle].m_methods.size(); } @@ -948,18 +957,15 @@ return cppstring_to_cstring(s_scopes[handle].m_methods[method_index].m_argtypes[arg_index]); } -char* cppyy_method_arg_default( - cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* arg_index */) { +char* cppyy_method_arg_default(cppyy_method_t, int /* arg_index */) { return cppstring_to_cstring(""); } -char* cppyy_method_signature( - cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) { +char* cppyy_method_signature(cppyy_method_t, int /* show_formalargs */) { return cppstring_to_cstring(""); } -char* cppyy_method_prototype( - cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) { +char* cppyy_method_prototype(cppyy_scope_t, cppyy_method_t, int /* show_formalargs */) { return cppstring_to_cstring(""); } @@ -967,15 +973,6 @@ return 0; } -cppyy_method_t cppyy_get_method(cppyy_scope_t handle, cppyy_index_t method_index) { - if (s_scopes.find(handle) != s_scopes.end()) { - long id = s_scopes[handle].m_method_offset + (long)method_index; - return (cppyy_method_t)id; - } - assert(!"unknown class in cppyy_get_method"); - return (cppyy_method_t)0; -} - cppyy_index_t cppyy_get_global_operator(cppyy_scope_t /* scope */, cppyy_scope_t /* lc */, cppyy_scope_t /* rc */, const char* /* op */) { return (cppyy_index_t)-1; diff --git a/pypy/module/_cppyy/test/test_advancedcpp.py b/pypy/module/_cppyy/test/test_advancedcpp.py --- a/pypy/module/_cppyy/test/test_advancedcpp.py +++ b/pypy/module/_cppyy/test/test_advancedcpp.py @@ -663,9 +663,8 @@ import _cppyy as cppyy Thrower = cppyy.gbl.Thrower - # TODO: clean up this interface: - Thrower.__cppdecl__.get_overload('throw_anything').__useffi__ = False - Thrower.__cppdecl__.get_overload('throw_exception').__useffi__ = False + Thrower.throw_anything.__useffi__ = False + Thrower.throw_exception.__useffi__ = False t = Thrower() diff --git a/pypy/module/_cppyy/test/test_cppyy.py b/pypy/module/_cppyy/test/test_cppyy.py --- a/pypy/module/_cppyy/test/test_cppyy.py +++ b/pypy/module/_cppyy/test/test_cppyy.py @@ -37,7 +37,8 @@ lib = ctypes.CDLL(%r, ctypes.RTLD_GLOBAL) def cpp_instantiate(tt, *args): inst = _cppyy._bind_object(0, tt, True) - tt.get_overload("__init__").call(inst, *args) + ol = tt.get_overload("__init__").__get__(inst) + ol(*args) return inst return lib, cpp_instantiate, _cppyy._scope_byname('example01'),\ _cppyy._scope_byname('payload')""" % (test_dct, ))) @@ -49,30 +50,30 @@ import sys, math t = self.example01 - res = t.get_overload("staticAddOneToInt").call(None, 1) + res = t.get_overload("staticAddOneToInt")(1) assert res == 2 - res = t.get_overload("staticAddOneToInt").call(None, 1L) + res = t.get_overload("staticAddOneToInt")(1L) assert res == 2 - res = t.get_overload("staticAddOneToInt").call(None, 1, 2) + res = t.get_overload("staticAddOneToInt")(1, 2) assert res == 4 - res = t.get_overload("staticAddOneToInt").call(None, -1) + res = t.get_overload("staticAddOneToInt")(-1) assert res == 0 maxint32 = int(2 ** 31 - 1) - res = t.get_overload("staticAddOneToInt").call(None, maxint32-1) + res = t.get_overload("staticAddOneToInt")(maxint32-1) assert res == maxint32 - res = t.get_overload("staticAddOneToInt").call(None, maxint32) + res = t.get_overload("staticAddOneToInt")(maxint32) assert res == -maxint32-1 - raises(TypeError, 't.get_overload("staticAddOneToInt").call(None, 1, [])') - raises(TypeError, 't.get_overload("staticAddOneToInt").call(None, 1.)') - raises(TypeError, 't.get_overload("staticAddOneToInt").call(None, maxint32+1)') + raises(TypeError, 't.get_overload("staticAddOneToInt")(1, [])') + raises(TypeError, 't.get_overload("staticAddOneToInt")(1.)') + raises(TypeError, 't.get_overload("staticAddOneToInt")(maxint32+1)') def test02_static_double(self): """Test passing of a double and returning of a double on a static function.""" t = self.example01 - res = t.get_overload("staticAddToDouble").call(None, 0.09) + res = t.get_overload("staticAddToDouble")(0.09) assert res == 0.09 + 0.01 def test03_static_constcharp(self): @@ -81,14 +82,14 @@ t = self.example01 - res = t.get_overload("staticAtoi").call(None, "1") + res = t.get_overload("staticAtoi")("1") assert res == 1 - res = t.get_overload("staticStrcpy").call(None, "aap") # TODO: this leaks + res = t.get_overload("staticStrcpy")("aap") # TODO: this leaks assert res == "aap" - res = t.get_overload("staticStrcpy").call(None, u"aap") # TODO: this leaks + res = t.get_overload("staticStrcpy")(u"aap") # TODO: this leaks assert res == "aap" - raises(TypeError, 't.get_overload("staticStrcpy").call(None, 1.)') # TODO: this leaks + raises(TypeError, 't.get_overload("staticStrcpy")(1.)') # TODO: this leaks def test04_method_int(self): """Test passing of a int, returning of a int, and memory cleanup, on @@ -97,30 +98,30 @@ t = self.example01 - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e1 = self.instantiate(t, 7) - assert t.get_overload("getCount").call(None) == 1 - res = t.get_overload("addDataToInt").call(e1, 4) + assert t.get_overload("getCount")() == 1 + res = t.get_overload("addDataToInt")(e1, 4) assert res == 11 - res = t.get_overload("addDataToInt").call(e1, -4) + res = t.get_overload("addDataToInt")(e1, -4) assert res == 3 e1.__destruct__() - assert t.get_overload("getCount").call(None) == 0 - raises(ReferenceError, 't.get_overload("addDataToInt").call(e1, 4)') + assert t.get_overload("getCount")() == 0 + raises(ReferenceError, 't.get_overload("addDataToInt")(e1, 4)') e1 = self.instantiate(t, 7) e2 = self.instantiate(t, 8) - assert t.get_overload("getCount").call(None) == 2 + assert t.get_overload("getCount")() == 2 e1.__destruct__() - assert t.get_overload("getCount").call(None) == 1 + assert t.get_overload("getCount")() == 1 e2.__destruct__() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e2.__destruct__() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 - raises(TypeError, t.get_overload("addDataToInt").call, 41, 4) + raises(TypeError, t.get_overload("addDataToInt"), 41, 4) def test05_memory(self): """Test memory destruction and integrity.""" @@ -130,29 +131,29 @@ t = self.example01 - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e1 = self.instantiate(t, 7) - assert t.get_overload("getCount").call(None) == 1 - res = t.get_overload("addDataToInt").call(e1, 4) + assert t.get_overload("getCount")() == 1 + res = t.get_overload("addDataToInt")(e1, 4) assert res == 11 - res = t.get_overload("addDataToInt").call(e1, -4) + res = t.get_overload("addDataToInt")(e1, -4) assert res == 3 e1 = None gc.collect() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e1 = self.instantiate(t, 7) e2 = self.instantiate(t, 8) - assert t.get_overload("getCount").call(None) == 2 + assert t.get_overload("getCount")() == 2 e1 = None gc.collect() - assert t.get_overload("getCount").call(None) == 1 + assert t.get_overload("getCount")() == 1 e2.__destruct__() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e2 = None gc.collect() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 def test05a_memory2(self): """Test ownership control.""" @@ -161,18 +162,18 @@ t = self.example01 - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 e1 = self.instantiate(t, 7) - assert t.get_overload("getCount").call(None) == 1 + assert t.get_overload("getCount")() == 1 assert e1.__python_owns__ == True e1.__python_owns__ = False e1 = None gc.collect() - assert t.get_overload("getCount").call(None) == 1 + assert t.get_overload("getCount")() == 1 # forced fix-up of object count for later tests - t.get_overload("setCount").call(None, 0) + t.get_overload("setCount")(0) def test06_method_double(self): @@ -183,15 +184,15 @@ t = self.example01 e = self.instantiate(t, 13) - res = t.get_overload("addDataToDouble").call(e, 16) + res = t.get_overload("addDataToDouble")(e, 16) assert round(res-29, 8) == 0. e.__destruct__() e = self.instantiate(t, -13) - res = t.get_overload("addDataToDouble").call(e, 16) + res = t.get_overload("addDataToDouble")(e, 16) assert round(res-3, 8) == 0. e.__destruct__() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 def test07_method_constcharp(self): """Test passing of a C string and returning of a C string on a @@ -201,14 +202,14 @@ t = self.example01 e = self.instantiate(t, 42) - res = t.get_overload("addDataToAtoi").call(e, "13") + res = t.get_overload("addDataToAtoi")(e, "13") assert res == 55 - res = t.get_overload("addToStringValue").call(e, "12") # TODO: this leaks + res = t.get_overload("addToStringValue")(e, "12") # TODO: this leaks assert res == "54" - res = t.get_overload("addToStringValue").call(e, "-12") # TODO: this leaks + res = t.get_overload("addToStringValue")(e, "-12") # TODO: this leaks assert res == "30" e.__destruct__() - assert t.get_overload("getCount").call(None) == 0 + assert t.get_overload("getCount")() == 0 def test08_pass_object_by_pointer(self): """Test passing of an instance as an argument.""" @@ -218,17 +219,17 @@ t2 = self.payload pl = self.instantiate(t2, 3.14) - assert round(t2.get_overload("getData").call(pl)-3.14, 8) == 0 - t1.get_overload("staticSetPayload").call(None, pl, 41.) - assert t2.get_overload("getData").call(pl) == 41. + assert round(t2.get_overload("getData")(pl)-3.14, 8) == 0 + t1.get_overload("staticSetPayload")(pl, 41.) + assert t2.get_overload("getData")(pl) == 41. e = self.instantiate(t1, 50) - t1.get_overload("setPayload").call(e, pl); - assert round(t2.get_overload("getData").call(pl)-50., 8) == 0 + t1.get_overload("setPayload")(e, pl); + assert round(t2.get_overload("getData")(pl)-50., 8) == 0 e.__destruct__() pl.__destruct__() - assert t1.get_overload("getCount").call(None) == 0 + assert t1.get_overload("getCount")() == 0 def test09_return_object_by_pointer(self): """Test returning of an instance as an argument.""" @@ -238,14 +239,14 @@ t2 = self.payload pl1 = self.instantiate(t2, 3.14) - assert round(t2.get_overload("getData").call(pl1)-3.14, 8) == 0 - pl2 = t1.get_overload("staticCyclePayload").call(None, pl1, 38.) - assert t2.get_overload("getData").call(pl2) == 38. + assert round(t2.get_overload("getData")(pl1)-3.14, 8) == 0 + pl2 = t1.get_overload("staticCyclePayload")(pl1, 38.) + assert t2.get_overload("getData")(pl2) == 38. e = self.instantiate(t1, 50) - pl2 = t1.get_overload("cyclePayload").call(e, pl1); - assert round(t2.get_overload("getData").call(pl2)-50., 8) == 0 + pl2 = t1.get_overload("cyclePayload")(e, pl1); + assert round(t2.get_overload("getData")(pl2)-50., 8) == 0 e.__destruct__() pl1.__destruct__() - assert t1.get_overload("getCount").call(None) == 0 + assert t1.get_overload("getCount")() == 0 diff --git a/pypy/module/_cppyy/test/test_datatypes.py b/pypy/module/_cppyy/test/test_datatypes.py --- a/pypy/module/_cppyy/test/test_datatypes.py +++ b/pypy/module/_cppyy/test/test_datatypes.py @@ -767,7 +767,4 @@ raises(TypeError, f3, f1, 2, 3) - # TODO: get straightforward access to the overload type - f2 = cppyy.gbl.__cppdecl__.get_overload('sum_of_double') - assert 5. == f3(f2, 5., 0.) diff --git a/pypy/module/_cppyy/test/test_pythonify.py b/pypy/module/_cppyy/test/test_pythonify.py --- a/pypy/module/_cppyy/test/test_pythonify.py +++ b/pypy/module/_cppyy/test/test_pythonify.py @@ -318,7 +318,7 @@ _cppyy.gbl.example01.fresh = _cppyy.gbl.installableAddOneToInt - e = _cppyy.gbl.example01(0) + e = _cppyy.gbl.example01(0) assert 2 == e.fresh(1) assert 3 == e.fresh(2) diff --git a/pypy/module/_cppyy/test/test_zjit.py b/pypy/module/_cppyy/test/test_zjit.py --- a/pypy/module/_cppyy/test/test_zjit.py +++ b/pypy/module/_cppyy/test/test_zjit.py @@ -268,19 +268,19 @@ def f(): cls = interp_cppyy.scope_byname(space, "example01") inst = interp_cppyy._bind_object(space, FakeInt(0), cls, True) - cls.get_overload("__init__").call(inst, [FakeInt(0)]) + cls.get_overload("__init__").descr_get(inst, []).call([FakeInt(0)]) cppmethod = cls.get_overload(method_name) assert isinstance(inst, interp_cppyy.W_CPPInstance) i = 10 while i > 0: drv.jit_merge_point(inst=inst, cppmethod=cppmethod, i=i) - cppmethod.call(inst, [FakeInt(i)]) + cppmethod.descr_get(inst, []).call([FakeInt(i)]) i -= 1 return 7 f() space = FakeSpace() result = self.meta_interp(f, [], listops=True, backendopt=True, listcomp=True) - self.check_jitcell_token_count(1) # same for fast and slow path?? + self.check_jitcell_token_count(0) # same for fast and slow path?? # rely on replacement of capi calls to raise exception instead (see FakeSpace.__init__) @py.test.mark.dont_track_allocations("cppmethod.cif_descr kept 'leaks'") From pypy.commits at gmail.com Thu Jun 7 19:16:41 2018 From: pypy.commits at gmail.com (wlav) Date: Thu, 07 Jun 2018 16:16:41 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: bring the dummy backend for testing up-to-date Message-ID: <5b19bcd9.1c69fb81.bccf8.10e4@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94738:9820fab951d5 Date: 2018-06-07 15:57 -0700 http://bitbucket.org/pypy/pypy/changeset/9820fab951d5/ Log: bring the dummy backend for testing up-to-date diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -68,9 +68,11 @@ pass # array type try: + if hasattr(space, "fake"): + raise NotImplementedError arr = space.interp_w(W_ArrayInstance, w_obj, can_be_None=True) - #if arr: - #return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) + if arr: + return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass # pre-defined nullptr diff --git a/pypy/module/_cppyy/src/dummy_backend.cxx b/pypy/module/_cppyy/src/dummy_backend.cxx --- a/pypy/module/_cppyy/src/dummy_backend.cxx +++ b/pypy/module/_cppyy/src/dummy_backend.cxx @@ -60,7 +60,6 @@ const std::string& returntype, EMethodType mtype = kNormal) : m_name(name), m_argtypes(argtypes), m_returntype(returntype), m_type(mtype) {} - std::string m_name; std::vector m_argtypes; std::string m_returntype; @@ -72,7 +71,6 @@ const std::string& type, ptrdiff_t offset, bool isstatic) : m_name(name), m_type(type), m_offset(offset), m_isstatic(isstatic) {} - std::string m_name; std::string m_type; ptrdiff_t m_offset; @@ -81,20 +79,20 @@ struct Cppyy_PseudoClassInfo { Cppyy_PseudoClassInfo() {} - Cppyy_PseudoClassInfo(const std::vector& methods, - long method_offset, + Cppyy_PseudoClassInfo(const std::vector& methods, const std::vector& data) : - m_methods(methods), m_method_offset(method_offset), m_datambrs(data) {} - - std::vector m_methods; - long m_method_offset; + m_methods(methods), m_datambrs(data) {} + std::vector m_methods; std::vector m_datambrs; }; typedef std::map Scopes_t; static Scopes_t s_scopes; -static std::map s_methods; +static std::map s_methods; +struct CleanPseudoMethods { + ~CleanPseudoMethods() { for (auto& x : s_methods) delete x.second; } +} _clean; int Pseudo_kNothing = 6; int Pseudo_kSomething = 111; @@ -105,28 +103,28 @@ offsetof(dummy::CppyyTestData, m_##dmname), false)); \ /* get_() */ \ argtypes.clear(); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "get_"#dmname, argtypes, #dmtype)); \ - s_methods["CppyyTestData::get_"#dmname] = s_method_id++; \ + s_methods["CppyyTestData::get_"#dmname] = methods.back(); \ /* & get__r() */ \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "get_"#dmname"_r", argtypes, #dmtype"&")); \ - s_methods["CppyyTestData::get_"#dmname"_r"] = s_method_id++; \ + s_methods["CppyyTestData::get_"#dmname"_r"] = methods.back(); \ /* const & get__cr() */ \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "get_"#dmname"_cr", argtypes, "const "#dmtype"&")); \ - s_methods["CppyyTestData::get_"#dmname"_cr"] = s_method_id++; \ + s_methods["CppyyTestData::get_"#dmname"_cr"] = methods.back(); \ /* void set_() */ \ argtypes.push_back(#dmtype); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "set_"#dmname, argtypes, "void")); \ - s_methods["CppyyTestData::set_"#dmname] = s_method_id++; \ + s_methods["CppyyTestData::set_"#dmname] = methods.back(); \ argtypes.clear(); \ /* void set_(const &) */ \ argtypes.push_back("const "#dmtype"&"); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "set_"#dmname"_cr", argtypes, "void")); \ - s_methods["CppyyTestData::set_"#dmname"_cr"] = s_method_id++ + s_methods["CppyyTestData::set_"#dmname"_cr"] = methods.back() #define PUBLIC_CPPYY_DATA2(dmname, dmtype) \ PUBLIC_CPPYY_DATA(dmname, dmtype); \ @@ -135,23 +133,23 @@ data.push_back(Cppyy_PseudoDatambrInfo("m_"#dmname"_array2", #dmtype"*", \ offsetof(dummy::CppyyTestData, m_##dmname##_array2), false)); \ argtypes.clear(); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "get_"#dmname"_array", argtypes, #dmtype"*")); \ - s_methods["CppyyTestData::get_"#dmname"_array"] = s_method_id++; \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + s_methods["CppyyTestData::get_"#dmname"_array"] = methods.back(); \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "get_"#dmname"_array2", argtypes, #dmtype"*")); \ - s_methods["CppyyTestData::get_"#dmname"_array2"] = s_method_id++ + s_methods["CppyyTestData::get_"#dmname"_array2"] = methods.back() #define PUBLIC_CPPYY_DATA3(dmname, dmtype, key) \ PUBLIC_CPPYY_DATA2(dmname, dmtype); \ argtypes.push_back(#dmtype"*"); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "pass_array", argtypes, #dmtype"*")); \ - s_methods["CppyyTestData::pass_array_"#dmname] = s_method_id++; \ + s_methods["CppyyTestData::pass_array_"#dmname] = methods.back(); \ argtypes.clear(); argtypes.push_back("void*"); \ - methods.push_back(Cppyy_PseudoMethodInfo( \ + methods.push_back(new Cppyy_PseudoMethodInfo( \ "pass_void_array_"#key, argtypes, #dmtype"*")); \ - s_methods["CppyyTestData::pass_void_array_"#key] = s_method_id++ + s_methods["CppyyTestData::pass_void_array_"#key] = methods.back() #define PUBLIC_CPPYY_STATIC_DATA(dmname, dmtype) \ data.push_back(Cppyy_PseudoDatambrInfo("s_"#dmname, #dmtype, \ @@ -162,7 +160,6 @@ Cppyy_InitPseudoReflectionInfo() { // class example01 -- static long s_scope_id = 0; - static long s_method_id = 0; { // namespace '' s_handles[""] = (cppyy_scope_t)++s_scope_id; @@ -175,33 +172,33 @@ { // class example01 -- s_handles["example01"] = (cppyy_scope_t)++s_scope_id; - std::vector methods; + std::vector methods; // static double staticAddToDouble(double a) std::vector argtypes; argtypes.push_back("double"); - methods.push_back(Cppyy_PseudoMethodInfo("staticAddToDouble", argtypes, "double", kStatic)); - s_methods["static_example01::staticAddToDouble_double"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticAddToDouble", argtypes, "double", kStatic)); + s_methods["static_example01::staticAddToDouble_double"] = methods.back(); // static int staticAddOneToInt(int a) // static int staticAddOneToInt(int a, int b) argtypes.clear(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("staticAddOneToInt", argtypes, "int", kStatic)); - s_methods["static_example01::staticAddOneToInt_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticAddOneToInt", argtypes, "int", kStatic)); + s_methods["static_example01::staticAddOneToInt_int"] = methods.back(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("staticAddOneToInt", argtypes, "int", kStatic)); - s_methods["static_example01::staticAddOneToInt_int_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticAddOneToInt", argtypes, "int", kStatic)); + s_methods["static_example01::staticAddOneToInt_int_int"] = methods.back(); // static int staticAtoi(const char* str) argtypes.clear(); argtypes.push_back("const char*"); - methods.push_back(Cppyy_PseudoMethodInfo("staticAtoi", argtypes, "int", kStatic)); - s_methods["static_example01::staticAtoi_cchar*"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticAtoi", argtypes, "int", kStatic)); + s_methods["static_example01::staticAtoi_cchar*"] = methods.back(); // static char* staticStrcpy(const char* strin) - methods.push_back(Cppyy_PseudoMethodInfo("staticStrcpy", argtypes, "char*", kStatic)); - s_methods["static_example01::staticStrcpy_cchar*"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticStrcpy", argtypes, "char*", kStatic)); + s_methods["static_example01::staticStrcpy_cchar*"] = methods.back(); // static void staticSetPayload(payload* p, double d) // static payload* staticCyclePayload(payload* p, double d) @@ -209,90 +206,89 @@ argtypes.clear(); argtypes.push_back("payload*"); argtypes.push_back("double"); - methods.push_back(Cppyy_PseudoMethodInfo("staticSetPayload", argtypes, "void", kStatic)); - s_methods["static_example01::staticSetPayload_payload*_double"] = s_method_id++; - methods.push_back(Cppyy_PseudoMethodInfo("staticCyclePayload", argtypes, "payload*", kStatic)); - s_methods["static_example01::staticCyclePayload_payload*_double"] = s_method_id++; - methods.push_back(Cppyy_PseudoMethodInfo("staticCopyCyclePayload", argtypes, "payload", kStatic)); - s_methods["static_example01::staticCopyCyclePayload_payload*_double"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("staticSetPayload", argtypes, "void", kStatic)); + s_methods["static_example01::staticSetPayload_payload*_double"] = methods.back(); + methods.push_back(new Cppyy_PseudoMethodInfo("staticCyclePayload", argtypes, "payload*", kStatic)); + s_methods["static_example01::staticCyclePayload_payload*_double"] = methods.back(); + methods.push_back(new Cppyy_PseudoMethodInfo("staticCopyCyclePayload", argtypes, "payload", kStatic)); + s_methods["static_example01::staticCopyCyclePayload_payload*_double"] = methods.back(); // static int getCount() // static void setCount(int) argtypes.clear(); - methods.push_back(Cppyy_PseudoMethodInfo("getCount", argtypes, "int", kStatic)); - s_methods["static_example01::getCount"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("getCount", argtypes, "int", kStatic)); + s_methods["static_example01::getCount"] = methods.back(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("setCount", argtypes, "void", kStatic)); - s_methods["static_example01::setCount_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("setCount", argtypes, "void", kStatic)); + s_methods["static_example01::setCount_int"] = methods.back(); // example01() // example01(int a) argtypes.clear(); - methods.push_back(Cppyy_PseudoMethodInfo("example01", argtypes, "constructor", kConstructor)); - s_methods["example01::example01"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("example01", argtypes, "constructor", kConstructor)); + s_methods["example01::example01"] = methods.back(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("example01", argtypes, "constructor", kConstructor)); - s_methods["example01::example01_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("example01", argtypes, "constructor", kConstructor)); + s_methods["example01::example01_int"] = methods.back(); // int addDataToInt(int a) argtypes.clear(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("addDataToInt", argtypes, "int")); - s_methods["example01::addDataToInt_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("addDataToInt", argtypes, "int")); + s_methods["example01::addDataToInt_int"] = methods.back(); // int addDataToIntConstRef(const int& a) argtypes.clear(); argtypes.push_back("const int&"); - methods.push_back(Cppyy_PseudoMethodInfo("addDataToIntConstRef", argtypes, "int")); - s_methods["example01::addDataToIntConstRef_cint&"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("addDataToIntConstRef", argtypes, "int")); + s_methods["example01::addDataToIntConstRef_cint&"] = methods.back(); // int overloadedAddDataToInt(int a, int b) argtypes.clear(); argtypes.push_back("int"); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); - s_methods["example01::overloadedAddDataToInt_int_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); + s_methods["example01::overloadedAddDataToInt_int_int"] = methods.back(); // int overloadedAddDataToInt(int a) // int overloadedAddDataToInt(int a, int b, int c) argtypes.clear(); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); - s_methods["example01::overloadedAddDataToInt_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); + s_methods["example01::overloadedAddDataToInt_int"] = methods.back(); argtypes.push_back("int"); argtypes.push_back("int"); - methods.push_back(Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); - s_methods["example01::overloadedAddDataToInt_int_int_int"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("overloadedAddDataToInt", argtypes, "int")); + s_methods["example01::overloadedAddDataToInt_int_int_int"] = methods.back(); // double addDataToDouble(double a) argtypes.clear(); argtypes.push_back("double"); - methods.push_back(Cppyy_PseudoMethodInfo("addDataToDouble", argtypes, "double")); - s_methods["example01::addDataToDouble_double"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("addDataToDouble", argtypes, "double")); + s_methods["example01::addDataToDouble_double"] = methods.back(); // int addDataToAtoi(const char* str) // char* addToStringValue(const char* str) argtypes.clear(); argtypes.push_back("const char*"); - methods.push_back(Cppyy_PseudoMethodInfo("addDataToAtoi", argtypes, "int")); - s_methods["example01::addDataToAtoi_cchar*"] = s_method_id++; - methods.push_back(Cppyy_PseudoMethodInfo("addToStringValue", argtypes, "char*")); - s_methods["example01::addToStringValue_cchar*"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("addDataToAtoi", argtypes, "int")); + s_methods["example01::addDataToAtoi_cchar*"] = methods.back(); + methods.push_back(new Cppyy_PseudoMethodInfo("addToStringValue", argtypes, "char*")); + s_methods["example01::addToStringValue_cchar*"] = methods.back(); // void setPayload(payload* p) // payload* cyclePayload(payload* p) // payload copyCyclePayload(payload* p) argtypes.clear(); argtypes.push_back("payload*"); - methods.push_back(Cppyy_PseudoMethodInfo("setPayload", argtypes, "void")); - s_methods["example01::setPayload_payload*"] = s_method_id++; - methods.push_back(Cppyy_PseudoMethodInfo("cyclePayload", argtypes, "payload*")); - s_methods["example01::cyclePayload_payload*"] = s_method_id++; - methods.push_back(Cppyy_PseudoMethodInfo("copyCyclePayload", argtypes, "payload")); - s_methods["example01::copyCyclePayload_payload*"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("setPayload", argtypes, "void")); + s_methods["example01::setPayload_payload*"] = methods.back(); + methods.push_back(new Cppyy_PseudoMethodInfo("cyclePayload", argtypes, "payload*")); + s_methods["example01::cyclePayload_payload*"] = methods.back(); + methods.push_back(new Cppyy_PseudoMethodInfo("copyCyclePayload", argtypes, "payload")); + s_methods["example01::copyCyclePayload_payload*"] = methods.back(); - Cppyy_PseudoClassInfo info( - methods, s_method_id - methods.size(), std::vector()); + Cppyy_PseudoClassInfo info(methods, std::vector()); s_scopes[(cppyy_scope_t)s_scope_id] = info; } // -- class example01 @@ -301,27 +297,26 @@ { // class payload -- s_handles["payload"] = (cppyy_scope_t)++s_scope_id; - std::vector methods; + std::vector methods; // payload(double d = 0.) std::vector argtypes; argtypes.push_back("double"); - methods.push_back(Cppyy_PseudoMethodInfo("payload", argtypes, "constructor", kConstructor)); - s_methods["payload::payload_double"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("payload", argtypes, "constructor", kConstructor)); + s_methods["payload::payload_double"] = methods.back(); // double getData() argtypes.clear(); - methods.push_back(Cppyy_PseudoMethodInfo("getData", argtypes, "double")); - s_methods["payload::getData"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("getData", argtypes, "double")); + s_methods["payload::getData"] = methods.back(); // void setData(double d) argtypes.clear(); argtypes.push_back("double"); - methods.push_back(Cppyy_PseudoMethodInfo("setData", argtypes, "void")); - s_methods["payload::setData_double"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("setData", argtypes, "void")); + s_methods["payload::setData_double"] = methods.back(); - Cppyy_PseudoClassInfo info( - methods, s_method_id - methods.size(), std::vector()); + Cppyy_PseudoClassInfo info(methods, std::vector()); s_scopes[(cppyy_scope_t)s_scope_id] = info; } // -- class payload @@ -330,21 +325,21 @@ { // class CppyyTestData -- s_handles["CppyyTestData"] = (cppyy_scope_t)++s_scope_id; - std::vector methods; + std::vector methods; // CppyyTestData() std::vector argtypes; - methods.push_back(Cppyy_PseudoMethodInfo("CppyyTestData", argtypes, "constructor", kConstructor)); - s_methods["CppyyTestData::CppyyTestData"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("CppyyTestData", argtypes, "constructor", kConstructor)); + s_methods["CppyyTestData::CppyyTestData"] = methods.back(); - methods.push_back(Cppyy_PseudoMethodInfo("destroy_arrays", argtypes, "void")); - s_methods["CppyyTestData::destroy_arrays"] = s_method_id++; + methods.push_back(new Cppyy_PseudoMethodInfo("destroy_arrays", argtypes, "void")); + s_methods["CppyyTestData::destroy_arrays"] = methods.back(); std::vector data; PUBLIC_CPPYY_DATA2(bool, bool); PUBLIC_CPPYY_DATA (char, char); PUBLIC_CPPYY_DATA (schar, signed char); - PUBLIC_CPPYY_DATA (uchar, unsigned char); + PUBLIC_CPPYY_DATA2(uchar, unsigned char); PUBLIC_CPPYY_DATA3(short, short, h); PUBLIC_CPPYY_DATA3(ushort, unsigned short, H); PUBLIC_CPPYY_DATA3(int, int, i); @@ -389,7 +384,7 @@ data.push_back(Cppyy_PseudoDatambrInfo( "kLots", "CppyyTestData::EWhat", (ptrdiff_t)&Pseudo_kLots, true)); - Cppyy_PseudoClassInfo info(methods, s_method_id - methods.size(), data); + Cppyy_PseudoClassInfo info(methods, data); s_scopes[(cppyy_scope_t)s_scope_id] = info; } // -- class CppyyTest_data @@ -432,7 +427,7 @@ /* method/function dispatching -------------------------------------------- */ void cppyy_call_v(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { - long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["static_example01::staticSetPayload_payload*_double"]) { assert(!self && nargs == 2); dummy::example01::staticSetPayload((dummy::payload*)(*(long*)&((CPPYY_G__value*)args)[0]), @@ -522,7 +517,7 @@ unsigned char cppyy_call_b(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { unsigned char result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["CppyyTestData::get_bool"]) { assert(self && nargs == 0); result = (unsigned char)((dummy::CppyyTestData*)self)->get_bool(); @@ -534,7 +529,7 @@ char cppyy_call_c(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { char result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["CppyyTestData::get_char"]) { assert(self && nargs == 0); result = ((dummy::CppyyTestData*)self)->get_char(); @@ -549,7 +544,7 @@ short cppyy_call_h(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { short result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["CppyyTestData::get_short"]) { assert(self && nargs == 0); result = ((dummy::CppyyTestData*)self)->get_short(); @@ -564,7 +559,7 @@ int cppyy_call_i(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { int result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["static_example01::staticAddOneToInt_int"]) { assert(!self && nargs == 1); result = dummy::example01::staticAddOneToInt(((CPPYY_G__value*)args)[0].obj.in); @@ -596,7 +591,7 @@ long cppyy_call_l(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { long result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["static_example01::staticStrcpy_cchar*"]) { assert(!self && nargs == 1); result = (long)dummy::example01::staticStrcpy( @@ -629,6 +624,12 @@ } else if (idx == s_methods["CppyyTestData::get_bool_array2"]) { assert(self && nargs == 0); result = (long)((dummy::CppyyTestData*)self)->get_bool_array2(); + } else if (idx == s_methods["CppyyTestData::get_uchar_array"]) { + assert(self && nargs == 0); + result = (long)((dummy::CppyyTestData*)self)->get_uchar_array(); + } else if (idx == s_methods["CppyyTestData::get_uchar_array2"]) { + assert(self && nargs == 0); + result = (long)((dummy::CppyyTestData*)self)->get_uchar_array2(); } else if (idx == s_methods["CppyyTestData::get_short_array"]) { assert(self && nargs == 0); result = (long)((dummy::CppyyTestData*)self)->get_short_array(); @@ -737,7 +738,7 @@ long long cppyy_call_ll(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { long long result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["CppyyTestData::get_llong"]) { assert(self && nargs == 0); result = ((dummy::CppyyTestData*)self)->get_llong(); @@ -752,7 +753,7 @@ float cppyy_call_f(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { float result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["CppyyTestData::get_float"]) { assert(self && nargs == 0); result = ((dummy::CppyyTestData*)self)->get_float(); @@ -764,7 +765,7 @@ double cppyy_call_d(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { double result = 0.; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["static_example01::staticAddToDouble_double"]) { assert(!self && nargs == 1); result = dummy::example01::staticAddToDouble(((CPPYY_G__value*)args)[0].obj.d); @@ -794,7 +795,7 @@ void* cppyy_call_r(cppyy_method_t method, cppyy_object_t self, int nargs, void* args) { void* result = nullptr; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (0) {} DISPATCH_CALL_R_GET(bool) DISPATCH_CALL_R_GET(short) @@ -818,7 +819,7 @@ char* cppyy_call_s(cppyy_method_t method, cppyy_object_t self, int nargs, void* args, size_t* /* length */) { char* result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["static_example01::staticStrcpy_cchar*"]) { assert(!self && nargs == 1); result = dummy::example01::staticStrcpy((const char*)(*(long*)&((CPPYY_G__value*)args)[0])); @@ -830,7 +831,7 @@ cppyy_object_t cppyy_constructor(cppyy_method_t method, cppyy_type_t handle, int nargs, void* args) { void* result = 0; - const long idx = (long)method; + Cppyy_PseudoMethodInfo* idx = (Cppyy_PseudoMethodInfo*)method; if (idx == s_methods["example01::example01"]) { assert(nargs == 0); result = new dummy::example01; @@ -850,11 +851,7 @@ return (cppyy_object_t)result; } -cppyy_funcaddr_t cppyy_function_address_from_index(cppyy_scope_t /* scope */, cppyy_index_t /* idx */) { - return (cppyy_funcaddr_t)0; -} - -cppyy_funcaddr_t cppyy_function_address_from_method(cppyy_method_t /* method */) { +cppyy_funcaddr_t cppyy_function_address(cppyy_method_t /* method */) { return (cppyy_funcaddr_t)0; } @@ -922,12 +919,15 @@ return 0; } +int cppyy_smartptr_info(const char* name, cppyy_type_t* raw, cppyy_method_t* deref) { + return 0; +} + /* method/function reflection information --------------------------------- */ -cppyy_method_t cppyy_get_method(cppyy_scope_t handle, cppyy_index_t method_index) { +cppyy_method_t cppyy_get_method(cppyy_scope_t handle, cppyy_index_t idx) { if (s_scopes.find(handle) != s_scopes.end()) { - long id = s_scopes[handle].m_method_offset + (long)method_index; - return (cppyy_method_t)id; + return (cppyy_method_t)s_scopes[handle].m_methods[idx]; } assert(!"unknown class in cppyy_get_method"); return (cppyy_method_t)0; @@ -937,24 +937,24 @@ return s_scopes[handle].m_methods.size(); } -char* cppyy_method_name(cppyy_scope_t handle, cppyy_index_t method_index) { - return cppstring_to_cstring(s_scopes[handle].m_methods[(int)method_index].m_name); +char* cppyy_method_name(cppyy_method_t method) { + return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_name); } -char* cppyy_method_result_type(cppyy_scope_t handle, cppyy_index_t method_index) { - return cppstring_to_cstring(s_scopes[handle].m_methods[method_index].m_returntype); +char* cppyy_method_result_type(cppyy_method_t method) { + return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_returntype); } -int cppyy_method_num_args(cppyy_scope_t handle, cppyy_index_t method_index) { - return s_scopes[handle].m_methods[method_index].m_argtypes.size(); +int cppyy_method_num_args(cppyy_method_t method) { + return ((Cppyy_PseudoMethodInfo*)method)->m_argtypes.size(); } -int cppyy_method_req_args(cppyy_scope_t handle, cppyy_index_t method_index) { - return cppyy_method_num_args(handle, method_index); +int cppyy_method_req_args(cppyy_method_t method) { + return cppyy_method_num_args(method); } -char* cppyy_method_arg_type(cppyy_scope_t handle, cppyy_index_t method_index, int arg_index) { - return cppstring_to_cstring(s_scopes[handle].m_methods[method_index].m_argtypes[arg_index]); +char* cppyy_method_arg_type(cppyy_method_t method, int idx) { + return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_argtypes[idx]); } char* cppyy_method_arg_default(cppyy_method_t, int /* arg_index */) { @@ -969,6 +969,10 @@ return cppstring_to_cstring(""); } +int cppyy_exists_method_template(cppyy_scope_t scope, const char* name) { + return 0; +} + int cppyy_method_is_template(cppyy_scope_t /* handle */, cppyy_index_t /* method_index */) { return 0; } @@ -980,24 +984,24 @@ /* method properties ----------------------------------------------------- */ -int cppyy_is_publicmethod(cppyy_type_t /* handle */, cppyy_index_t /* method_index */) { +int cppyy_is_publicmethod(cppyy_method_t) { return 1; } -int cppyy_is_constructor(cppyy_type_t handle, cppyy_index_t method_index) { - if (s_scopes.find(handle) != s_scopes.end()) - return s_scopes[handle].m_methods[method_index].m_type == kConstructor; +int cppyy_is_constructor(cppyy_method_t method) { + if (method) + return ((Cppyy_PseudoMethodInfo*)method)->m_type == kConstructor; assert(!"unknown class in cppyy_is_constructor"); return 0; } -int cppyy_is_destructor(cppyy_type_t /* handle */, cppyy_index_t /* method_index */) { +int cppyy_is_destructor(cppyy_method_t) { return 0; } -int cppyy_is_staticmethod(cppyy_type_t handle, cppyy_index_t method_index) { - if (s_scopes.find(handle) != s_scopes.end()) - return s_scopes[handle].m_methods[method_index].m_type == kStatic; +int cppyy_is_staticmethod(cppyy_method_t method) { + if (method) + return ((Cppyy_PseudoMethodInfo*)method)->m_type == kStatic; assert(!"unknown class in cppyy_is_staticmethod"); return 0; } From pypy.commits at gmail.com Fri Jun 8 09:52:30 2018 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 08 Jun 2018 06:52:30 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: merge py3.5 Message-ID: <5b1a8a1e.1c69fb81.66720.d8ef@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: py3.6 Changeset: r94739:a2790e033e07 Date: 2018-06-08 15:50 +0200 http://bitbucket.org/pypy/pypy/changeset/a2790e033e07/ Log: merge py3.5 diff too long, truncating to 2000 out of 9430 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -33,7 +33,12 @@ 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 0e2d9a73f5a1818d0245d75daccdbe21b2d5c3ef release-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +d7724c0a5700b895a47de44074cdf5fd659a988f RevDB-pypy2.7-v5.4.1 aff251e543859ce4508159dd9f1a82a2f553de00 release-pypy2.7-v5.6.0 +e90317857d27917bf840caf675832292ee070510 RevDB-pypy2.7-v5.6.1 +a24d6c7000c8099c73d3660857f7e3cee5ac045c RevDB-pypy2.7-v5.6.2 fa3249d55d15b9829e1be69cdf45b5a44cec902d release-pypy2.7-v5.7.0 b16a4363e930f6401bceb499b9520955504c6cb0 release-pypy3.5-v5.7.0 1aa2d8e03cdfab54b7121e93fda7e98ea88a30bf release-pypy2.7-v5.7.1 diff --git a/lib-python/3/opcode.py b/lib-python/3/opcode.py --- a/lib-python/3/opcode.py +++ b/lib-python/3/opcode.py @@ -224,5 +224,6 @@ def_op('CALL_METHOD', 202) # #args not including 'self' def_op('BUILD_LIST_FROM_ARG', 203) jrel_op('JUMP_IF_NOT_DEBUG', 204) # jump over assert statements +def_op('LOAD_REVDB_VAR', 205) # reverse debugger (syntax example: $5) del def_op, name_op, jrel_op, jabs_op diff --git a/lib_pypy/_sysconfigdata.py b/lib_pypy/_sysconfigdata.py --- a/lib_pypy/_sysconfigdata.py +++ b/lib_pypy/_sysconfigdata.py @@ -24,6 +24,15 @@ 'VERSION': sys.version[:3] } +if find_executable("gcc"): + build_time_vars.update({ + "CC": "gcc -pthread", + "GNULD": "yes", + "LDSHARED": "gcc -pthread -shared", + }) + if find_executable("g++"): + build_time_vars["CXX"] = "g++ -pthread" + if sys.platform[:6] == "darwin": import platform if platform.machine() == 'i386': @@ -36,12 +45,6 @@ arch = platform.machine() build_time_vars['LDSHARED'] += ' -undefined dynamic_lookup' build_time_vars['CC'] += ' -arch %s' % (arch,) + if "CXX" in build_time_vars: + build_time_vars['CXX'] += ' -arch %s' % (arch,) -if find_executable("gcc"): - build_time_vars.update({ - "CC": "gcc -pthread", - "GNULD": "yes", - "LDSHARED": "gcc -pthread -shared", - }) - if find_executable("g++"): - build_time_vars["CXX"] = "g++ -pthread" diff --git a/lib_pypy/grp.py b/lib_pypy/grp.py --- a/lib_pypy/grp.py +++ b/lib_pypy/grp.py @@ -5,6 +5,8 @@ import os from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -33,37 +35,39 @@ @builtinify def getgrgid(gid): - try: - res = lib.getgrgid(gid) - except TypeError: - gid = int(gid) - res = lib.getgrgid(gid) - import warnings - warnings.warn("group id must be int", DeprecationWarning) - if not res: - # XXX maybe check error eventually - raise KeyError(gid) - return _group_from_gstruct(res) - + with _lock: + try: + res = lib.getgrgid(gid) + except TypeError: + gid = int(gid) + res = lib.getgrgid(gid) + import warnings + warnings.warn("group id must be int", DeprecationWarning) + if not res: + # XXX maybe check error eventually + raise KeyError(gid) + return _group_from_gstruct(res) @builtinify def getgrnam(name): if not isinstance(name, str): raise TypeError("expected string") - res = lib.getgrnam(os.fsencode(name)) - if not res: - raise KeyError("'getgrnam(): name not found: %s'" % name) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrnam(os.fsencode(name)) + if not res: + raise KeyError("'getgrnam(): name not found: %s'" % name) + return _group_from_gstruct(res) @builtinify def getgrall(): - lib.setgrent() lst = [] - while 1: - p = lib.getgrent() - if not p: - break - lst.append(_group_from_gstruct(p)) - lib.endgrent() + with _lock: + lib.setgrent() + while 1: + p = lib.getgrent() + if not p: + break + lst.append(_group_from_gstruct(p)) + lib.endgrent() return lst __all__ = ('struct_group', 'getgrgid', 'getgrnam', 'getgrall') diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py --- a/lib_pypy/pwd.py +++ b/lib_pypy/pwd.py @@ -12,6 +12,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -54,10 +56,11 @@ Return the password database entry for the given numeric user ID. See pwd.__doc__ for more on password database entries. """ - pw = lib.getpwuid(uid) - if not pw: - raise KeyError("getpwuid(): uid not found: %s" % uid) - return _mkpwent(pw) + with _lock: + pw = lib.getpwuid(uid) + if not pw: + raise KeyError("getpwuid(): uid not found: %s" % uid) + return _mkpwent(pw) @builtinify def getpwnam(name): @@ -70,10 +73,11 @@ if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - pw = lib.getpwnam(name) - if not pw: - raise KeyError("getpwname(): name not found: %s" % name) - return _mkpwent(pw) + with _lock: + pw = lib.getpwnam(name) + if not pw: + raise KeyError("getpwname(): name not found: %s" % name) + return _mkpwent(pw) @builtinify def getpwall(): @@ -83,13 +87,14 @@ See pwd.__doc__ for more on password database entries. """ users = [] - lib.setpwent() - while True: - pw = lib.getpwent() - if not pw: - break - users.append(_mkpwent(pw)) - lib.endpwent() + with _lock: + lib.setpwent() + while True: + pw = lib.getpwent() + if not pw: + break + users.append(_mkpwent(pw)) + lib.endpwent() return users __all__ = ('struct_passwd', 'getpwuid', 'getpwnam', 'getpwall') diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -61,6 +61,11 @@ "termios", "_minimal_curses", ]) +reverse_debugger_disable_modules = set([ + "_continuation", "_vmprof", "_multiprocessing", + "micronumpy", + ]) + # XXX this should move somewhere else, maybe to platform ("is this posixish" # check or something) if sys.platform == "win32": @@ -297,6 +302,9 @@ modules = working_modules.copy() if config.translation.sandbox: modules = default_modules + if config.translation.reverse_debugger: + for mod in reverse_debugger_disable_modules: + setattr(config.objspace.usemodules, mod, False) # ignore names from 'essential_modules', notably 'exceptions', which # may not be present in config.objspace.usemodules at all modules = [name for name in modules if name not in essential_modules] diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -18,3 +18,17 @@ .. branch: crypt_h Include crypt.h for crypt() on Linux + +.. branch: gc-more-logging + +Log additional gc-minor and gc-collect-step info in the PYPYLOG + +.. branch: reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -13,6 +13,13 @@ Use implementation-specific site directories in sysconfig like in Python2 + .. branch: alex_gaynor/remove-an-unneeded-call-into-openssl-th-1526429141011 Remove an unneeded call into OpenSSL, from cpython https://github.com/python/cpython/pull/6887 + + +.. branch: py3.5-reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -88,11 +88,24 @@ run_protected() handles details like forwarding exceptions to sys.excepthook(), catching SystemExit, etc. """ + # don't use try:except: here, otherwise the exception remains + # visible in user code. Make sure revdb_stop is a callable, so + # that we can call it immediately after finally: below. Doing + # so minimizes the number of "blind" lines that we need to go + # back from, with "bstep", after we do "continue" in revdb. + if '__pypy__' in sys.builtin_module_names: + from __pypy__ import revdb_stop + else: + revdb_stop = None + if revdb_stop is None: + revdb_stop = lambda: None + try: # run it try: f(*fargs, **fkwds) finally: + revdb_stop() sys.settrace(None) sys.setprofile(None) except SystemExit as e: diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py --- a/pypy/interpreter/astcompiler/assemble.py +++ b/pypy/interpreter/astcompiler/assemble.py @@ -695,6 +695,7 @@ # TODO ops.BUILD_LIST_FROM_ARG: 1, + ops.LOAD_REVDB_VAR: 1, ops.LOAD_CLASSDEREF: 1, } diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -1801,6 +1801,8 @@ return Num.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Str): return Str.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_RevDBMetaVar): + return RevDBMetaVar.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_FormattedValue): return FormattedValue.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_JoinedStr): @@ -2711,6 +2713,41 @@ State.ast_type('Str', 'expr', ['s']) +class RevDBMetaVar(expr): + + def __init__(self, metavar, lineno, col_offset): + self.metavar = metavar + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_RevDBMetaVar(self) + + def mutate_over(self, visitor): + return visitor.visit_RevDBMetaVar(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_RevDBMetaVar) + w_metavar = space.newint(self.metavar) # int + space.setattr(w_node, space.newtext('metavar'), w_metavar) + w_lineno = space.newint(self.lineno) # int + space.setattr(w_node, space.newtext('lineno'), w_lineno) + w_col_offset = space.newint(self.col_offset) # int + space.setattr(w_node, space.newtext('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_metavar = get_field(space, w_node, 'metavar', False) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) + _metavar = space.int_w(w_metavar) + _lineno = space.int_w(w_lineno) + _col_offset = space.int_w(w_col_offset) + return RevDBMetaVar(_metavar, _lineno, _col_offset) + +State.ast_type('RevDBMetaVar', 'expr', ['metavar']) + + class FormattedValue(expr): def __init__(self, value, conversion, format_spec, lineno, col_offset): @@ -4205,6 +4242,8 @@ return self.default_visitor(node) def visit_Str(self, node): return self.default_visitor(node) + def visit_RevDBMetaVar(self, node): + return self.default_visitor(node) def visit_FormattedValue(self, node): return self.default_visitor(node) def visit_JoinedStr(self, node): @@ -4444,6 +4483,9 @@ def visit_Str(self, node): pass + def visit_RevDBMetaVar(self, node): + pass + def visit_FormattedValue(self, node): node.value.walkabout(self) if node.format_spec: diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -1303,6 +1303,11 @@ else: # a dictionary display return self.handle_dictdisplay(maker, atom_node) + elif first_child_type == tokens.REVDBMETAVAR: + string = atom_node.get_child(0).get_value() + return ast.RevDBMetaVar(int(string[1:]), + atom_node.get_lineno(), + atom_node.get_column()) else: raise AssertionError("unknown atom") diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py --- a/pypy/interpreter/astcompiler/codegen.py +++ b/pypy/interpreter/astcompiler/codegen.py @@ -1677,6 +1677,20 @@ fmt.format_spec.walkabout(self) self.emit_op_arg(ops.FORMAT_VALUE, arg) + def _revdb_metavar(self, node): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import dbstate + if not dbstate.standard_code: + self.emit_op_arg(ops.LOAD_REVDB_VAR, node.metavar) + return True + return False + + def visit_RevDBMetaVar(self, node): + if self.space.reverse_debugging and self._revdb_metavar(node): + return + self.error("Unknown character ('$NUM' is only valid in the " + "reverse-debugger)", node) + class TopLevelCodeGenerator(PythonCodeGenerator): diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1168,7 +1168,7 @@ assert isinstance(s, ast.Bytes) assert space.eq_w(s.s, space.newbytes("hi implicitly extra")) raises(SyntaxError, self.get_first_expr, "b'hello' 'world'") - sentence = u"Die Männer ärgen sich!" + sentence = u"Die Männer ärgern sich!" source = u"# coding: utf-7\nstuff = '%s'" % (sentence,) info = pyparse.CompileInfo("", "exec") tree = self.parser.parse_source(source.encode("utf-7"), info) diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -31,7 +31,7 @@ generator._resolve_block_targets(blocks) return generator, blocks -class TestCompiler: +class BaseTestCompiler: """These tests compile snippets of code and check them by running them with our own interpreter. These are thus not completely *unit* tests, but given that our interpreter is @@ -97,6 +97,9 @@ def error_test(self, source, exc_type): py.test.raises(exc_type, self.simple_test, source, None, None) + +class TestCompiler(BaseTestCompiler): + def test_issue_713(self): func = "def f(_=2): return (_ if _ else _) if False else _" yield self.st, func, "f()", 2 @@ -1247,6 +1250,24 @@ yield self.st, src, 'z', 0xd8 +class TestCompilerRevDB(BaseTestCompiler): + spaceconfig = {"translation.reverse_debugger": True} + + def test_revdb_metavar(self): + from pypy.interpreter.reverse_debugging import dbstate, setup_revdb + self.space.config.translation.reverse_debugger = True + self.space.reverse_debugging = True + try: + setup_revdb(self.space) + dbstate.standard_code = False + dbstate.metavars = [self.space.wrap(6)] + self.simple_test("x = 7*$0", "x", 42) + dbstate.standard_code = True + self.error_test("x = 7*$0", SyntaxError) + finally: + self.space.reverse_debugging = False + + class AppTestCompiler: def setup_class(cls): diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl b/pypy/interpreter/astcompiler/tools/Python.asdl --- a/pypy/interpreter/astcompiler/tools/Python.asdl +++ b/pypy/interpreter/astcompiler/tools/Python.asdl @@ -77,6 +77,7 @@ | Call(expr func, expr* args, keyword* keywords) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? + | RevDBMetaVar(int metavar) | FormattedValue(expr value, int? conversion, expr? format_spec) | JoinedStr(expr* values) | Bytes(bytes s) diff --git a/pypy/interpreter/astcompiler/validate.py b/pypy/interpreter/astcompiler/validate.py --- a/pypy/interpreter/astcompiler/validate.py +++ b/pypy/interpreter/astcompiler/validate.py @@ -461,6 +461,9 @@ node.slice.walkabout(self) self._validate_expr(node.value) + def visit_RevDBMetaVar(self, node): + pass + # Subscripts def visit_Slice(self, node): if node.lower: diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -405,6 +405,8 @@ """Base class for the interpreter-level implementations of object spaces. http://pypy.readthedocs.org/en/latest/objspace.html""" + reverse_debugging = False + @not_rpython def __init__(self, config=None): "Basic initialization of objects." @@ -416,6 +418,7 @@ from pypy.config.pypyoption import get_pypy_config config = get_pypy_config(translating=False) self.config = config + self.reverse_debugging = config.translation.reverse_debugger self.builtin_modules = {} self.reloading_modules = {} @@ -433,6 +436,9 @@ def startup(self): # To be called before using the space + if self.reverse_debugging: + self._revdb_startup() + self.threadlocals.enter_thread(self) # Initialize already imported builtin modules @@ -823,7 +829,8 @@ w_u1 = self.interned_strings.get(u) if w_u1 is None: w_u1 = w_u - self.interned_strings.set(u, w_u1) + if self._side_effects_ok(): + self.interned_strings.set(u, w_u1) return w_u1 def new_interned_str(self, s): @@ -835,9 +842,39 @@ w_s1 = self.interned_strings.get(u) if w_s1 is None: w_s1 = self.newunicode(u) - self.interned_strings.set(u, w_s1) + if self._side_effects_ok(): + self.interned_strings.set(u, w_s1) return w_s1 + def _revdb_startup(self): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import setup_revdb + setup_revdb(self) + + def _revdb_standard_code(self): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import dbstate + return dbstate.standard_code + + def _side_effects_ok(self): + # For the reverse debugger: we run compiled watchpoint + # expressions in a fast way that will crash if they have + # side-effects. The obvious Python code with side-effects is + # documented "don't do that"; but some non-obvious side + # effects are also common, like interning strings (from + # unmarshalling the code object containing the watchpoint + # expression) to the two attribute caches in mapdict.py and + # typeobject.py. For now, we have to identify such places + # that are not acceptable for "reasonable" read-only + # watchpoint expressions, and write: + # + # if not space._side_effects_ok(): + # don't cache. + # + if self.reverse_debugging: + return self._revdb_standard_code() + return True + def get_interned_str(self, s): """Assumes an identifier (utf-8 encoded str). Returns None if the identifier is not interned, or not a valid utf-8 string at all. diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -71,6 +71,8 @@ return frame def enter(self, frame): + if self.space.reverse_debugging: + self._revdb_enter(frame) frame.f_backref = self.topframeref self.topframeref = jit.virtual_ref(frame) @@ -91,6 +93,8 @@ # be accessed also later frame_vref() jit.virtual_ref_finish(frame_vref, frame) + if self.space.reverse_debugging: + self._revdb_leave(got_exception) # ________________________________________________________________ @@ -160,6 +164,8 @@ Like bytecode_trace() but doesn't invoke any other events besides the trace function. """ + if self.space.reverse_debugging: + self._revdb_potential_stop_point(frame) if (frame.get_w_f_trace() is None or self.is_tracing or self.gettrace() is None): return @@ -373,6 +379,21 @@ if self.space.check_signal_action is not None: self.space.check_signal_action.perform(self, None) + def _revdb_enter(self, frame): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import enter_call + enter_call(self.topframeref(), frame) + + def _revdb_leave(self, got_exception): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import leave_call + leave_call(self.topframeref(), got_exception) + + def _revdb_potential_stop_point(self, frame): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import potential_stop_point + potential_stop_point(frame) + def _freeze_(self): raise Exception("ExecutionContext instances should not be seen during" " translation. Now is a good time to inspect the" diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py --- a/pypy/interpreter/pyframe.py +++ b/pypy/interpreter/pyframe.py @@ -783,9 +783,11 @@ def fget_f_builtins(self, space): return self.get_builtin().getdict(space) + def get_f_back(self): + return ExecutionContext.getnextframe_nohidden(self) + def fget_f_back(self, space): - f_back = ExecutionContext.getnextframe_nohidden(self) - return f_back + return self.get_f_back() def fget_f_lasti(self, space): return self.space.newint(self.last_instr) diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -442,6 +442,8 @@ self.FORMAT_VALUE(oparg, next_instr) elif opcode == opcodedesc.BUILD_STRING.index: self.BUILD_STRING(oparg, next_instr) + elif opcode == opcodedesc.LOAD_REVDB_VAR.index: + self.LOAD_REVDB_VAR(oparg, next_instr) else: self.MISSING_OPCODE(oparg, next_instr) @@ -1141,9 +1143,16 @@ # final result and returns. In that case, we can just continue # with the next bytecode. + def _revdb_jump_backward(self, jumpto): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import jump_backward + jump_backward(self, jumpto) + def jump_absolute(self, jumpto, ec): # this function is overridden by pypy.module.pypyjit.interp_jit check_nonneg(jumpto) + if self.space.reverse_debugging: + self._revdb_jump_backward(jumpto) return jumpto def JUMP_FORWARD(self, jumpby, next_instr): @@ -1451,21 +1460,12 @@ @jit.unroll_safe def BUILD_SET_UNPACK(self, itemcount, next_instr): space = self.space - w_sum = space.newset() + w_set = space.newset() for i in range(itemcount, 0, -1): w_item = self.peekvalue(i-1) - # cannot use w_sum.update, w_item might not be a set - iterator = space.iter(w_item) - while True: - try: - w_value = space.next(iterator) - except OperationError: - break - w_sum.add(w_value) - while itemcount != 0: - self.popvalue() - itemcount -= 1 - self.pushvalue(w_sum) + space.call_method(w_set, "update", w_item) + self.popvalues(itemcount) + self.pushvalue(w_set) @jit.unroll_safe def list_unpack_helper(frame, itemcount): @@ -1474,9 +1474,7 @@ for i in range(itemcount, 0, -1): w_item = frame.peekvalue(i-1) w_sum.extend(w_item) - while itemcount != 0: - frame.popvalue() - itemcount -= 1 + frame.popvalues(itemcount) return w_sum @jit.unroll_safe @@ -1516,9 +1514,7 @@ space.call_method(w_dict, 'update', w_item) if with_call and space.len_w(w_dict) < expected_length: self._build_map_unpack_error(itemcount) - while itemcount > 0: - self.popvalue() - itemcount -= 1 + self.popvalues(itemcount) self.pushvalue(w_dict) @jit.dont_look_inside @@ -1686,6 +1682,19 @@ w_res = space.newunicode(u''.join(lst)) self.pushvalue(w_res) + def _revdb_load_var(self, oparg): + # moved in its own function for the import statement + from pypy.interpreter.reverse_debugging import load_metavar + w_var = load_metavar(oparg) + self.pushvalue(w_var) + + def LOAD_REVDB_VAR(self, oparg, next_instr): + if self.space.reverse_debugging: + self._revdb_load_var(oparg) + else: + self.MISSING_OPCODE(oparg, next_instr) + + ### ____________________________________________________________ ### class ExitFrame(Exception): diff --git a/pypy/interpreter/pyparser/data/Grammar2.7 b/pypy/interpreter/pyparser/data/Grammar2.7 --- a/pypy/interpreter/pyparser/data/Grammar2.7 +++ b/pypy/interpreter/pyparser/data/Grammar2.7 @@ -104,7 +104,7 @@ '[' [listmaker] ']' | '{' [dictorsetmaker] '}' | '`' testlist1 '`' | - NAME | NUMBER | STRING+) + NAME | NUMBER | STRING+ | '$NUM') listmaker: test ( list_for | (',' test)* [','] ) testlist_comp: test ( comp_for | (',' test)* [','] ) lambdef: 'lambda' [varargslist] ':' test diff --git a/pypy/interpreter/pyparser/data/Grammar3.2 b/pypy/interpreter/pyparser/data/Grammar3.2 --- a/pypy/interpreter/pyparser/data/Grammar3.2 +++ b/pypy/interpreter/pyparser/data/Grammar3.2 @@ -103,7 +103,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.3 b/pypy/interpreter/pyparser/data/Grammar3.3 --- a/pypy/interpreter/pyparser/data/Grammar3.3 +++ b/pypy/interpreter/pyparser/data/Grammar3.3 @@ -103,7 +103,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.5 b/pypy/interpreter/pyparser/data/Grammar3.5 --- a/pypy/interpreter/pyparser/data/Grammar3.5 +++ b/pypy/interpreter/pyparser/data/Grammar3.5 @@ -108,7 +108,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/data/Grammar3.6 b/pypy/interpreter/pyparser/data/Grammar3.6 --- a/pypy/interpreter/pyparser/data/Grammar3.6 +++ b/pypy/interpreter/pyparser/data/Grammar3.6 @@ -107,7 +107,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | - NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') + NAME | NUMBER | STRING+ | '$NUM' | '...' | 'None' | 'True' | 'False') testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] diff --git a/pypy/interpreter/pyparser/dfa_generated.py b/pypy/interpreter/pyparser/dfa_generated.py --- a/pypy/interpreter/pyparser/dfa_generated.py +++ b/pypy/interpreter/pyparser/dfa_generated.py @@ -6,42 +6,43 @@ from pypy.interpreter.pyparser import automata accepts = [True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, - True, False, False, False, True, False, False, - False, True, False, False, True, False, False, - True, False, False, True, False, False, True, - False, False, True, False, True, False, True, - False, True, False, False, False, False, True, True, False, False, False, False, True, False, - True, False, True, False, True, False, True, True, - False, True, False, True, False, False, True, - True, True, True, True] + False, False, True, False, False, True, False, + False, True, False, True, False, True, False, + False, True, False, False, True, False, True, + False, True, False, True, False, False, False, + False, True, True, False, False, False, False, + True, False, True, False, True, False, True, + False, True, True, False, True, False, True, + False, False, True, True, True, True, True] states = [ # 0 {'\t': 0, '\n': 15, '\x0c': 0, - '\r': 16, ' ': 0, '!': 11, '"': 18, - '#': 20, '%': 14, '&': 14, "'": 17, - '(': 15, ')': 15, '*': 8, '+': 14, - ',': 15, '-': 12, '.': 7, '/': 13, - '0': 5, '1': 6, '2': 6, '3': 6, - '4': 6, '5': 6, '6': 6, '7': 6, - '8': 6, '9': 6, ':': 15, ';': 15, - '<': 10, '=': 14, '>': 9, '@': 14, - 'A': 1, 'B': 2, 'C': 1, 'D': 1, - 'E': 1, 'F': 2, 'G': 1, 'H': 1, - 'I': 1, 'J': 1, 'K': 1, 'L': 1, - 'M': 1, 'N': 1, 'O': 1, 'P': 1, - 'Q': 1, 'R': 3, 'S': 1, 'T': 1, - 'U': 4, 'V': 1, 'W': 1, 'X': 1, - 'Y': 1, 'Z': 1, '[': 15, '\\': 19, - ']': 15, '^': 14, '_': 1, '`': 15, - 'a': 1, 'b': 2, 'c': 1, 'd': 1, - 'e': 1, 'f': 2, 'g': 1, 'h': 1, - 'i': 1, 'j': 1, 'k': 1, 'l': 1, - 'm': 1, 'n': 1, 'o': 1, 'p': 1, - 'q': 1, 'r': 3, 's': 1, 't': 1, - 'u': 4, 'v': 1, 'w': 1, 'x': 1, - 'y': 1, 'z': 1, '{': 15, '|': 14, - '}': 15, '~': 15, '\x80': 1}, + '\r': 16, ' ': 0, '!': 11, '"': 19, + '#': 21, '$': 17, '%': 14, '&': 14, + "'": 18, '(': 15, ')': 15, '*': 8, + '+': 14, ',': 15, '-': 12, '.': 7, + '/': 13, '0': 5, '1': 6, '2': 6, + '3': 6, '4': 6, '5': 6, '6': 6, + '7': 6, '8': 6, '9': 6, ':': 15, + ';': 15, '<': 10, '=': 14, '>': 9, + '@': 14, 'A': 1, 'B': 2, 'C': 1, + 'D': 1, 'E': 1, 'F': 2, 'G': 1, + 'H': 1, 'I': 1, 'J': 1, 'K': 1, + 'L': 1, 'M': 1, 'N': 1, 'O': 1, + 'P': 1, 'Q': 1, 'R': 3, 'S': 1, + 'T': 1, 'U': 4, 'V': 1, 'W': 1, + 'X': 1, 'Y': 1, 'Z': 1, '[': 15, + '\\': 20, ']': 15, '^': 14, '_': 1, + '`': 15, 'a': 1, 'b': 2, 'c': 1, + 'd': 1, 'e': 1, 'f': 2, 'g': 1, + 'h': 1, 'i': 1, 'j': 1, 'k': 1, + 'l': 1, 'm': 1, 'n': 1, 'o': 1, + 'p': 1, 'q': 1, 'r': 3, 's': 1, + 't': 1, 'u': 4, 'v': 1, 'w': 1, + 'x': 1, 'y': 1, 'z': 1, '{': 15, + '|': 14, '}': 15, '~': 15, + '\x80': 1}, # 1 {'0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, @@ -60,7 +61,7 @@ 't': 1, 'u': 1, 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 2 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 1, 'C': 1, 'D': 1, @@ -78,7 +79,7 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 3 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 4, 'C': 1, 'D': 1, @@ -96,7 +97,7 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 4 - {'"': 18, "'": 17, '0': 1, '1': 1, + {'"': 19, "'": 18, '0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 1, 'C': 1, 'D': 1, @@ -114,21 +115,21 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 5 - {'.': 27, '0': 24, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'B': 23, - 'E': 28, 'J': 15, 'O': 22, 'X': 21, - '_': 25, 'b': 23, 'e': 28, 'j': 15, - 'o': 22, 'x': 21}, + {'.': 28, '0': 25, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'B': 24, + 'E': 29, 'J': 15, 'O': 23, 'X': 22, + '_': 26, 'b': 24, 'e': 29, 'j': 15, + 'o': 23, 'x': 22}, # 6 - {'.': 27, '0': 6, '1': 6, '2': 6, + {'.': 28, '0': 6, '1': 6, '2': 6, '3': 6, '4': 6, '5': 6, '6': 6, - '7': 6, '8': 6, '9': 6, 'E': 28, - 'J': 15, '_': 29, 'e': 28, 'j': 15}, + '7': 6, '8': 6, '9': 6, 'E': 29, + 'J': 15, '_': 30, 'e': 29, 'j': 15}, # 7 - {'.': 31, '0': 30, '1': 30, '2': 30, - '3': 30, '4': 30, '5': 30, '6': 30, - '7': 30, '8': 30, '9': 30}, + {'.': 32, '0': 31, '1': 31, '2': 31, + '3': 31, '4': 31, '5': 31, '6': 31, + '7': 31, '8': 31, '9': 31}, # 8 {'*': 14, '=': 15}, # 9 @@ -148,239 +149,247 @@ # 16 {'\n': 15}, # 17 - {automata.DEFAULT: 35, '\n': 32, - '\r': 32, "'": 33, '\\': 34}, + {'0': 33, '1': 33, '2': 33, '3': 33, + '4': 33, '5': 33, '6': 33, '7': 33, + '8': 33, '9': 33}, # 18 - {automata.DEFAULT: 38, '\n': 32, - '\r': 32, '"': 36, '\\': 37}, + {automata.DEFAULT: 37, '\n': 34, + '\r': 34, "'": 35, '\\': 36}, # 19 + {automata.DEFAULT: 40, '\n': 34, + '\r': 34, '"': 38, '\\': 39}, + # 20 {'\n': 15, '\r': 16}, - # 20 - {automata.DEFAULT: 20, '\n': 32, '\r': 32}, # 21 - {'0': 39, '1': 39, '2': 39, '3': 39, - '4': 39, '5': 39, '6': 39, '7': 39, - '8': 39, '9': 39, 'A': 39, 'B': 39, - 'C': 39, 'D': 39, 'E': 39, 'F': 39, - '_': 40, 'a': 39, 'b': 39, 'c': 39, - 'd': 39, 'e': 39, 'f': 39}, + {automata.DEFAULT: 21, '\n': 34, '\r': 34}, # 22 {'0': 41, '1': 41, '2': 41, '3': 41, '4': 41, '5': 41, '6': 41, '7': 41, - '_': 42}, + '8': 41, '9': 41, 'A': 41, 'B': 41, + 'C': 41, 'D': 41, 'E': 41, 'F': 41, + '_': 42, 'a': 41, 'b': 41, 'c': 41, + 'd': 41, 'e': 41, 'f': 41}, # 23 - {'0': 43, '1': 43, '_': 44}, + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '_': 44}, # 24 - {'.': 27, '0': 24, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'E': 28, - 'J': 15, '_': 25, 'e': 28, 'j': 15}, + {'0': 45, '1': 45, '_': 46}, # 25 - {'0': 45, '1': 46, '2': 46, '3': 46, - '4': 46, '5': 46, '6': 46, '7': 46, - '8': 46, '9': 46}, + {'.': 28, '0': 25, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'E': 29, + 'J': 15, '_': 26, 'e': 29, 'j': 15}, # 26 - {'.': 27, '0': 26, '1': 26, '2': 26, - '3': 26, '4': 26, '5': 26, '6': 26, - '7': 26, '8': 26, '9': 26, 'E': 28, - 'J': 15, '_': 47, 'e': 28, 'j': 15}, + {'0': 47, '1': 48, '2': 48, '3': 48, + '4': 48, '5': 48, '6': 48, '7': 48, + '8': 48, '9': 48}, # 27 - {'0': 27, '1': 27, '2': 27, '3': 27, - '4': 27, '5': 27, '6': 27, '7': 27, - '8': 27, '9': 27, 'E': 48, 'J': 15, - 'e': 48, 'j': 15}, + {'.': 28, '0': 27, '1': 27, '2': 27, + '3': 27, '4': 27, '5': 27, '6': 27, + '7': 27, '8': 27, '9': 27, 'E': 29, + 'J': 15, '_': 49, 'e': 29, 'j': 15}, # 28 - {'+': 49, '-': 49, '0': 50, '1': 50, - '2': 50, '3': 50, '4': 50, '5': 50, - '6': 50, '7': 50, '8': 50, '9': 50}, + {'0': 28, '1': 28, '2': 28, '3': 28, + '4': 28, '5': 28, '6': 28, '7': 28, + '8': 28, '9': 28, 'E': 50, 'J': 15, + 'e': 50, 'j': 15}, # 29 - {'0': 51, '1': 51, '2': 51, '3': 51, - '4': 51, '5': 51, '6': 51, '7': 51, - '8': 51, '9': 51}, + {'+': 51, '-': 51, '0': 52, '1': 52, + '2': 52, '3': 52, '4': 52, '5': 52, + '6': 52, '7': 52, '8': 52, '9': 52}, # 30 - {'0': 30, '1': 30, '2': 30, '3': 30, - '4': 30, '5': 30, '6': 30, '7': 30, - '8': 30, '9': 30, 'E': 48, 'J': 15, - '_': 52, 'e': 48, 'j': 15}, + {'0': 53, '1': 53, '2': 53, '3': 53, + '4': 53, '5': 53, '6': 53, '7': 53, + '8': 53, '9': 53}, # 31 + {'0': 31, '1': 31, '2': 31, '3': 31, + '4': 31, '5': 31, '6': 31, '7': 31, + '8': 31, '9': 31, 'E': 50, 'J': 15, + '_': 54, 'e': 50, 'j': 15}, + # 32 {'.': 15}, - # 32 + # 33 + {'0': 33, '1': 33, '2': 33, '3': 33, + '4': 33, '5': 33, '6': 33, '7': 33, + '8': 33, '9': 33}, + # 34 {}, - # 33 + # 35 {"'": 15}, - # 34 - {automata.DEFAULT: 53, '\n': 15, '\r': 16}, - # 35 - {automata.DEFAULT: 35, '\n': 32, - '\r': 32, "'": 15, '\\': 34}, # 36 + {automata.DEFAULT: 55, '\n': 15, '\r': 16}, + # 37 + {automata.DEFAULT: 37, '\n': 34, + '\r': 34, "'": 15, '\\': 36}, + # 38 {'"': 15}, - # 37 - {automata.DEFAULT: 54, '\n': 15, '\r': 16}, - # 38 - {automata.DEFAULT: 38, '\n': 32, - '\r': 32, '"': 15, '\\': 37}, # 39 - {'0': 39, '1': 39, '2': 39, '3': 39, - '4': 39, '5': 39, '6': 39, '7': 39, - '8': 39, '9': 39, 'A': 39, 'B': 39, - 'C': 39, 'D': 39, 'E': 39, 'F': 39, - '_': 55, 'a': 39, 'b': 39, 'c': 39, - 'd': 39, 'e': 39, 'f': 39}, + {automata.DEFAULT: 56, '\n': 15, '\r': 16}, # 40 - {'0': 56, '1': 56, '2': 56, '3': 56, - '4': 56, '5': 56, '6': 56, '7': 56, - '8': 56, '9': 56, 'A': 56, 'B': 56, - 'C': 56, 'D': 56, 'E': 56, 'F': 56, - 'a': 56, 'b': 56, 'c': 56, 'd': 56, - 'e': 56, 'f': 56}, + {automata.DEFAULT: 40, '\n': 34, + '\r': 34, '"': 15, '\\': 39}, # 41 {'0': 41, '1': 41, '2': 41, '3': 41, '4': 41, '5': 41, '6': 41, '7': 41, - '_': 57}, + '8': 41, '9': 41, 'A': 41, 'B': 41, + 'C': 41, 'D': 41, 'E': 41, 'F': 41, + '_': 57, 'a': 41, 'b': 41, 'c': 41, + 'd': 41, 'e': 41, 'f': 41}, # 42 {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58}, + '4': 58, '5': 58, '6': 58, '7': 58, + '8': 58, '9': 58, 'A': 58, 'B': 58, + 'C': 58, 'D': 58, 'E': 58, 'F': 58, + 'a': 58, 'b': 58, 'c': 58, 'd': 58, + 'e': 58, 'f': 58}, # 43 - {'0': 43, '1': 43, '_': 59}, + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '_': 59}, # 44 - {'0': 60, '1': 60}, + {'0': 60, '1': 60, '2': 60, '3': 60, + '4': 60, '5': 60, '6': 60, '7': 60}, # 45 - {'.': 27, '0': 45, '1': 46, '2': 46, - '3': 46, '4': 46, '5': 46, '6': 46, - '7': 46, '8': 46, '9': 46, 'E': 28, - 'J': 15, '_': 25, 'e': 28, 'j': 15}, + {'0': 45, '1': 45, '_': 61}, # 46 - {'.': 27, '0': 46, '1': 46, '2': 46, - '3': 46, '4': 46, '5': 46, '6': 46, - '7': 46, '8': 46, '9': 46, 'E': 28, - 'J': 15, '_': 47, 'e': 28, 'j': 15}, + {'0': 62, '1': 62}, # 47 - {'0': 46, '1': 46, '2': 46, '3': 46, - '4': 46, '5': 46, '6': 46, '7': 46, - '8': 46, '9': 46}, + {'.': 28, '0': 47, '1': 48, '2': 48, + '3': 48, '4': 48, '5': 48, '6': 48, + '7': 48, '8': 48, '9': 48, 'E': 29, + 'J': 15, '_': 26, 'e': 29, 'j': 15}, # 48 - {'+': 61, '-': 61, '0': 62, '1': 62, - '2': 62, '3': 62, '4': 62, '5': 62, - '6': 62, '7': 62, '8': 62, '9': 62}, + {'.': 28, '0': 48, '1': 48, '2': 48, + '3': 48, '4': 48, '5': 48, '6': 48, + '7': 48, '8': 48, '9': 48, 'E': 29, + 'J': 15, '_': 49, 'e': 29, 'j': 15}, # 49 - {'0': 50, '1': 50, '2': 50, '3': 50, - '4': 50, '5': 50, '6': 50, '7': 50, - '8': 50, '9': 50}, + {'0': 48, '1': 48, '2': 48, '3': 48, + '4': 48, '5': 48, '6': 48, '7': 48, + '8': 48, '9': 48}, # 50 - {'0': 50, '1': 50, '2': 50, '3': 50, - '4': 50, '5': 50, '6': 50, '7': 50, - '8': 50, '9': 50, 'J': 15, '_': 63, + {'+': 63, '-': 63, '0': 64, '1': 64, + '2': 64, '3': 64, '4': 64, '5': 64, + '6': 64, '7': 64, '8': 64, '9': 64}, + # 51 + {'0': 52, '1': 52, '2': 52, '3': 52, + '4': 52, '5': 52, '6': 52, '7': 52, + '8': 52, '9': 52}, + # 52 + {'0': 52, '1': 52, '2': 52, '3': 52, + '4': 52, '5': 52, '6': 52, '7': 52, + '8': 52, '9': 52, 'J': 15, '_': 65, 'j': 15}, - # 51 - {'.': 27, '0': 51, '1': 51, '2': 51, - '3': 51, '4': 51, '5': 51, '6': 51, - '7': 51, '8': 51, '9': 51, 'E': 28, - 'J': 15, '_': 29, 'e': 28, 'j': 15}, - # 52 + # 53 + {'.': 28, '0': 53, '1': 53, '2': 53, + '3': 53, '4': 53, '5': 53, '6': 53, + '7': 53, '8': 53, '9': 53, 'E': 29, + 'J': 15, '_': 30, 'e': 29, 'j': 15}, + # 54 + {'0': 66, '1': 66, '2': 66, '3': 66, + '4': 66, '5': 66, '6': 66, '7': 66, + '8': 66, '9': 66}, + # 55 + {automata.DEFAULT: 55, '\n': 34, + '\r': 34, "'": 15, '\\': 36}, + # 56 + {automata.DEFAULT: 56, '\n': 34, + '\r': 34, '"': 15, '\\': 39}, + # 57 + {'0': 67, '1': 67, '2': 67, '3': 67, + '4': 67, '5': 67, '6': 67, '7': 67, + '8': 67, '9': 67, 'A': 67, 'B': 67, + 'C': 67, 'D': 67, 'E': 67, 'F': 67, + 'a': 67, 'b': 67, 'c': 67, 'd': 67, + 'e': 67, 'f': 67}, + # 58 + {'0': 58, '1': 58, '2': 58, '3': 58, + '4': 58, '5': 58, '6': 58, '7': 58, + '8': 58, '9': 58, 'A': 58, 'B': 58, + 'C': 58, 'D': 58, 'E': 58, 'F': 58, + '_': 68, 'a': 58, 'b': 58, 'c': 58, + 'd': 58, 'e': 58, 'f': 58}, + # 59 + {'0': 69, '1': 69, '2': 69, '3': 69, + '4': 69, '5': 69, '6': 69, '7': 69}, + # 60 + {'0': 60, '1': 60, '2': 60, '3': 60, + '4': 60, '5': 60, '6': 60, '7': 60, + '_': 70}, + # 61 + {'0': 71, '1': 71}, + # 62 + {'0': 62, '1': 62, '_': 72}, + # 63 {'0': 64, '1': 64, '2': 64, '3': 64, '4': 64, '5': 64, '6': 64, '7': 64, '8': 64, '9': 64}, - # 53 - {automata.DEFAULT: 53, '\n': 32, - '\r': 32, "'": 15, '\\': 34}, - # 54 - {automata.DEFAULT: 54, '\n': 32, - '\r': 32, '"': 15, '\\': 37}, - # 55 - {'0': 65, '1': 65, '2': 65, '3': 65, - '4': 65, '5': 65, '6': 65, '7': 65, - '8': 65, '9': 65, 'A': 65, 'B': 65, - 'C': 65, 'D': 65, 'E': 65, 'F': 65, - 'a': 65, 'b': 65, 'c': 65, 'd': 65, - 'e': 65, 'f': 65}, - # 56 - {'0': 56, '1': 56, '2': 56, '3': 56, - '4': 56, '5': 56, '6': 56, '7': 56, - '8': 56, '9': 56, 'A': 56, 'B': 56, - 'C': 56, 'D': 56, 'E': 56, 'F': 56, - '_': 66, 'a': 56, 'b': 56, 'c': 56, - 'd': 56, 'e': 56, 'f': 56}, - # 57 - {'0': 67, '1': 67, '2': 67, '3': 67, - '4': 67, '5': 67, '6': 67, '7': 67}, - # 58 - {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58, - '_': 68}, - # 59 - {'0': 69, '1': 69}, - # 60 - {'0': 60, '1': 60, '_': 70}, - # 61 - {'0': 62, '1': 62, '2': 62, '3': 62, - '4': 62, '5': 62, '6': 62, '7': 62, - '8': 62, '9': 62}, - # 62 - {'0': 62, '1': 62, '2': 62, '3': 62, - '4': 62, '5': 62, '6': 62, '7': 62, - '8': 62, '9': 62, 'J': 15, '_': 71, - 'j': 15}, - # 63 - {'0': 72, '1': 72, '2': 72, '3': 72, - '4': 72, '5': 72, '6': 72, '7': 72, - '8': 72, '9': 72}, # 64 {'0': 64, '1': 64, '2': 64, '3': 64, '4': 64, '5': 64, '6': 64, '7': 64, - '8': 64, '9': 64, 'E': 48, 'J': 15, - '_': 52, 'e': 48, 'j': 15}, + '8': 64, '9': 64, 'J': 15, '_': 73, + 'j': 15}, # 65 - {'0': 65, '1': 65, '2': 65, '3': 65, - '4': 65, '5': 65, '6': 65, '7': 65, - '8': 65, '9': 65, 'A': 65, 'B': 65, - 'C': 65, 'D': 65, 'E': 65, 'F': 65, - '_': 55, 'a': 65, 'b': 65, 'c': 65, - 'd': 65, 'e': 65, 'f': 65}, + {'0': 74, '1': 74, '2': 74, '3': 74, + '4': 74, '5': 74, '6': 74, '7': 74, + '8': 74, '9': 74}, # 66 - {'0': 73, '1': 73, '2': 73, '3': 73, - '4': 73, '5': 73, '6': 73, '7': 73, - '8': 73, '9': 73, 'A': 73, 'B': 73, - 'C': 73, 'D': 73, 'E': 73, 'F': 73, - 'a': 73, 'b': 73, 'c': 73, 'd': 73, - 'e': 73, 'f': 73}, + {'0': 66, '1': 66, '2': 66, '3': 66, + '4': 66, '5': 66, '6': 66, '7': 66, + '8': 66, '9': 66, 'E': 50, 'J': 15, + '_': 54, 'e': 50, 'j': 15}, # 67 {'0': 67, '1': 67, '2': 67, '3': 67, '4': 67, '5': 67, '6': 67, '7': 67, - '_': 57}, + '8': 67, '9': 67, 'A': 67, 'B': 67, + 'C': 67, 'D': 67, 'E': 67, 'F': 67, + '_': 57, 'a': 67, 'b': 67, 'c': 67, + 'd': 67, 'e': 67, 'f': 67}, # 68 - {'0': 74, '1': 74, '2': 74, '3': 74, - '4': 74, '5': 74, '6': 74, '7': 74}, + {'0': 75, '1': 75, '2': 75, '3': 75, + '4': 75, '5': 75, '6': 75, '7': 75, + '8': 75, '9': 75, 'A': 75, 'B': 75, + 'C': 75, 'D': 75, 'E': 75, 'F': 75, + 'a': 75, 'b': 75, 'c': 75, 'd': 75, + 'e': 75, 'f': 75}, # 69 - {'0': 69, '1': 69, '_': 59}, + {'0': 69, '1': 69, '2': 69, '3': 69, + '4': 69, '5': 69, '6': 69, '7': 69, + '_': 59}, # 70 - {'0': 75, '1': 75}, + {'0': 76, '1': 76, '2': 76, '3': 76, + '4': 76, '5': 76, '6': 76, '7': 76}, # 71 - {'0': 76, '1': 76, '2': 76, '3': 76, - '4': 76, '5': 76, '6': 76, '7': 76, - '8': 76, '9': 76}, + {'0': 71, '1': 71, '_': 61}, # 72 - {'0': 72, '1': 72, '2': 72, '3': 72, - '4': 72, '5': 72, '6': 72, '7': 72, - '8': 72, '9': 72, 'J': 15, '_': 63, - 'j': 15}, + {'0': 77, '1': 77}, # 73 - {'0': 73, '1': 73, '2': 73, '3': 73, - '4': 73, '5': 73, '6': 73, '7': 73, - '8': 73, '9': 73, 'A': 73, 'B': 73, - 'C': 73, 'D': 73, 'E': 73, 'F': 73, - '_': 66, 'a': 73, 'b': 73, 'c': 73, - 'd': 73, 'e': 73, 'f': 73}, + {'0': 78, '1': 78, '2': 78, '3': 78, + '4': 78, '5': 78, '6': 78, '7': 78, + '8': 78, '9': 78}, # 74 {'0': 74, '1': 74, '2': 74, '3': 74, '4': 74, '5': 74, '6': 74, '7': 74, - '_': 68}, + '8': 74, '9': 74, 'J': 15, '_': 65, + 'j': 15}, # 75 - {'0': 75, '1': 75, '_': 70}, + {'0': 75, '1': 75, '2': 75, '3': 75, + '4': 75, '5': 75, '6': 75, '7': 75, + '8': 75, '9': 75, 'A': 75, 'B': 75, + 'C': 75, 'D': 75, 'E': 75, 'F': 75, + '_': 68, 'a': 75, 'b': 75, 'c': 75, + 'd': 75, 'e': 75, 'f': 75}, # 76 {'0': 76, '1': 76, '2': 76, '3': 76, '4': 76, '5': 76, '6': 76, '7': 76, - '8': 76, '9': 76, 'J': 15, '_': 71, + '_': 70}, + # 77 + {'0': 77, '1': 77, '_': 72}, + # 78 + {'0': 78, '1': 78, '2': 78, '3': 78, + '4': 78, '5': 78, '6': 78, '7': 78, + '8': 78, '9': 78, 'J': 15, '_': 73, 'j': 15}, ] pseudoDFA = automata.DFA(states, accepts) diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -45,7 +45,7 @@ self.tok = self.tokens[index] def skip(self, n): - if self.tok[0] == n: + if self.tok.token_type == n: self.next() return True else: @@ -53,7 +53,7 @@ def skip_name(self, name): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME and self.tok[1] == name: + if self.tok.token_type == pygram.tokens.NAME and self.tok.value == name: self.next() return True else: @@ -61,8 +61,8 @@ def next_feature_name(self): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME: - name = self.tok[1] + if self.tok.token_type == pygram.tokens.NAME: + name = self.tok.value self.next() if self.skip_name("as"): self.skip(pygram.tokens.NAME) @@ -99,7 +99,7 @@ # somewhere inside the last __future__ import statement # (at the start would be fine too, but it's easier to grab a # random position inside) - last_position = (it.tok[2], it.tok[3]) + last_position = (it.tok.lineno, it.tok.column) result |= future_flags.get_compiler_feature(it.next_feature_name()) while it.skip(pygram.tokens.COMMA): result |= future_flags.get_compiler_feature(it.next_feature_name()) diff --git a/pypy/interpreter/pyparser/gendfa.py b/pypy/interpreter/pyparser/gendfa.py old mode 100755 new mode 100644 --- a/pypy/interpreter/pyparser/gendfa.py +++ b/pypy/interpreter/pyparser/gendfa.py @@ -166,7 +166,10 @@ makeEOL(), chainStr(states, "..."), groupStr(states, "@:;.,`")) - funny = group(states, operator, bracket, special) + revdb_metavar = chain(states, + groupStr(states, "$"), + atleastonce(states, makeDigits())) + funny = group(states, operator, bracket, special, revdb_metavar) # ____________________________________________________________ def makeStrPrefix (): return group(states, diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -28,11 +28,24 @@ new.symbol_ids = self.symbol_ids new.symbols_names = self.symbol_names new.keyword_ids = self.keyword_ids + new.token_to_error_string = self.token_to_error_string new.dfas = self.dfas new.labels = self.labels new.token_ids = self.token_ids return new + + def classify(self, token): + """Find the label for a token.""" + if token.token_type == self.KEYWORD_TOKEN: + label_index = self.keyword_ids.get(token.value, -1) + if label_index != -1: + return label_index + label_index = self.token_ids.get(token.token_type, -1) + if label_index == -1: + raise ParseError("invalid token", token) + return label_index + def _freeze_(self): # Remove some attributes not used in parsing. try: @@ -65,6 +78,33 @@ b[pos] |= bit return str(b) + +class Token(object): + def __init__(self, token_type, value, lineno, column, line): + self.token_type = token_type + self.value = value + self.lineno = lineno + # 0-based offset + self.column = column + self.line = line + + def __repr__(self): + return "Token(%s, %s)" % (self.token_type, self.value) + + def __eq__(self, other): + # for tests + return ( + self.token_type == other.token_type and + self.value == other.value and + self.lineno == other.lineno and + self.column == other.column and + self.line == other.line + ) + + def __ne__(self, other): + return not self == other + + class Node(object): __slots__ = ("type", ) @@ -105,6 +145,11 @@ self.lineno = lineno self.column = column + @staticmethod + def fromtoken(token): + return Terminal( + token.token_type, token.value, token.lineno, token.column) + def __repr__(self): return "Terminal(type=%s, value=%r)" % (self.type, self.value) @@ -193,20 +238,14 @@ class ParseError(Exception): - def __init__(self, msg, token_type, value, lineno, column, line, - expected=-1, expected_str=None): + def __init__(self, msg, token, expected=-1, expected_str=None): self.msg = msg - self.token_type = token_type - self.value = value - self.lineno = lineno - # this is a 0-based index - self.column = column - self.line = line + self.token = token self.expected = expected self.expected_str = expected_str def __str__(self): - return "ParserError(%s, %r)" % (self.token_type, self.value) + return "ParserError(%s)" % (self.token, ) class StackEntry(object): @@ -249,8 +288,8 @@ self.root = None self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) - def add_token(self, token_type, value, lineno, column, line): - label_index = self.classify(token_type, value, lineno, column, line) + def add_token(self, token): + label_index = self.grammar.classify(token) sym_id = 0 # for the annotator while True: dfa = self.stack.dfa @@ -261,7 +300,7 @@ sym_id = self.grammar.labels[i] if label_index == i: # We matched a non-terminal. - self.shift(next_state, token_type, value, lineno, column) + self.shift(next_state, token) state = states[next_state] # While the only possible action is to accept, pop nodes off # the stack. @@ -278,8 +317,7 @@ sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. if sub_node_dfa.could_match_token(label_index): - self.push(sub_node_dfa, next_state, sym_id, lineno, - column) + self.push(sub_node_dfa, next_state, sym_id) break else: # We failed to find any arcs to another state, so unless this @@ -287,8 +325,7 @@ if is_accepting: self.pop() if self.stack is None: - raise ParseError("too much input", token_type, value, - lineno, column, line) + raise ParseError("too much input", token) else: # If only one possible input would satisfy, attach it to the # error. @@ -299,28 +336,16 @@ else: expected = -1 expected_str = None - raise ParseError("bad input", token_type, value, lineno, - column, line, expected, expected_str) + raise ParseError("bad input", token, expected, expected_str) - def classify(self, token_type, value, lineno, column, line): - """Find the label for a token.""" - if token_type == self.grammar.KEYWORD_TOKEN: - label_index = self.grammar.keyword_ids.get(value, -1) - if label_index != -1: - return label_index - label_index = self.grammar.token_ids.get(token_type, -1) - if label_index == -1: - raise ParseError("invalid token", token_type, value, lineno, column, - line) - return label_index - def shift(self, next_state, token_type, value, lineno, column): + def shift(self, next_state, token): """Shift a non-terminal and prepare for the next state.""" - new_node = Terminal(token_type, value, lineno, column) + new_node = Terminal.fromtoken(token) self.stack.node_append_child(new_node) self.stack.state = next_state - def push(self, next_dfa, next_state, node_type, lineno, column): + def push(self, next_dfa, next_state, node_type): """Push a terminal and adjust the current state.""" self.stack.state = next_state self.stack = self.stack.push(next_dfa, 0) diff --git a/pypy/interpreter/pyparser/pygram.py b/pypy/interpreter/pyparser/pygram.py --- a/pypy/interpreter/pyparser/pygram.py +++ b/pypy/interpreter/pyparser/pygram.py @@ -20,6 +20,13 @@ python_grammar = _get_python_grammar() +python_grammar_revdb = python_grammar.shared_copy() +copied_token_ids = python_grammar.token_ids.copy() +python_grammar_revdb.token_ids = copied_token_ids + +metavar_token_id = pytoken.python_tokens['REVDBMETAVAR'] +del python_grammar.token_ids[metavar_token_id] + class _Tokens(object): pass for tok_name, idx in pytoken.python_tokens.iteritems(): @@ -36,3 +43,11 @@ syms._rev_lookup = rev_lookup # for debugging del _get_python_grammar, _Tokens, tok_name, sym_name, idx + +def choose_grammar(print_function, revdb): + assert print_function + if revdb: + return python_grammar_revdb + else: + return python_grammar + diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -163,48 +163,57 @@ flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) - tp = 0 try: - last_value_seen = None - next_value_seen = None + last_token_seen = None + next_token_seen = None try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) + except error.TokenError as e: + e.filename = compile_info.filename + raise + except error.TokenIndentationError as e: + e.filename = compile_info.filename + raise - newflags, last_future_import = ( - future.add_future_flags(self.future_flags, tokens)) - compile_info.last_future_import = last_future_import - compile_info.flags |= newflags - self.grammar = pygram.python_grammar + newflags, last_future_import = ( + future.add_future_flags(self.future_flags, tokens)) + compile_info.last_future_import = last_future_import + compile_info.flags |= newflags + + self.grammar = pygram.choose_grammar( + print_function=True, + revdb=self.space.config.translation.reverse_debugger) + try: tokens_stream = iter(tokens) - for tp, value, lineno, column, line in tokens_stream: - next_value_seen = value - if self.add_token(tp, value, lineno, column, line): + for token in tokens_stream: + next_token_seen = token + if self.add_token(token): break - last_value_seen = value - last_value_seen = None - next_value_seen = None + last_token_seen = token + last_token_seen = None + next_token_seen = None if compile_info.mode == 'single': - for tp, value, lineno, column, line in tokens_stream: - if tp == pygram.tokens.ENDMARKER: + for token in tokens_stream: + if token.token_type == pygram.tokens.ENDMARKER: break - if tp == pygram.tokens.NEWLINE: + if token.token_type == pygram.tokens.NEWLINE: continue - if tp == pygram.tokens.COMMENT: - for tp, _, _, _, _ in tokens_stream: - if tp == pygram.tokens.NEWLINE: + if token.token_type == pygram.tokens.COMMENT: + for token in tokens_stream: + if token.token_type == pygram.tokens.NEWLINE: break else: new_err = error.SyntaxError msg = ("multiple statements found while " "compiling a single statement") - raise new_err(msg, lineno, column, - line, compile_info.filename) + raise new_err(msg, token.lineno, token.column, + token.line, compile_info.filename) except error.TokenError as e: e.filename = compile_info.filename @@ -216,17 +225,18 @@ # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError - if tp == pygram.tokens.INDENT: + if token.token_type == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError - if (last_value_seen in ('print', 'exec') and - bool(next_value_seen) and - next_value_seen != '('): + if (last_token_seen is not None and + last_token_seen.value in ('print', 'exec') and + next_token_seen is not None and + next_token_seen.value != '('): msg = "Missing parentheses in call to '%s'" % ( - last_value_seen,) + last_token_seen,) else: msg = "invalid syntax" if e.expected_str is not None: @@ -234,7 +244,7 @@ # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' - raise new_err(msg, e.lineno, e.column + 1, e.line, + raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line, compile_info.filename) else: tree = self.root diff --git a/pypy/interpreter/pyparser/pytoken.py b/pypy/interpreter/pyparser/pytoken.py --- a/pypy/interpreter/pyparser/pytoken.py +++ b/pypy/interpreter/pyparser/pytoken.py @@ -72,5 +72,6 @@ # extra PyPy-specific tokens _add_tok("COMMENT") _add_tok("NL") +_add_tok("REVDBMETAVAR", "$NUM") del _add_tok diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py --- a/pypy/interpreter/pyparser/pytokenize.py +++ b/pypy/interpreter/pyparser/pytokenize.py @@ -1,9 +1,6 @@ # ______________________________________________________________________ """Module pytokenize -THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED -TO BE ANNOTABLE (Mainly made lists homogeneous) - This is a modified version of Ka-Ping Yee's tokenize module found in the Python standard library. @@ -12,7 +9,6 @@ expressions have been replaced with hand built DFA's using the basil.util.automata module. -$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ """ # ______________________________________________________________________ @@ -87,22 +83,3 @@ tabsize = 8 alttabsize = 1 - -# PYPY MODIFICATION: removed TokenError class as it's not needed here - -# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here - -# PYPY MODIFICATION: removed printtoken() as it's not needed here - -# PYPY MODIFICATION: removed tokenize() as it's not needed here - -# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here - -# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified -# in pythonlexer.py - -# PYPY MODIFICATION: removed main() as it's not needed here - -# ______________________________________________________________________ -# End of pytokenize.py - diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -1,4 +1,5 @@ from pypy.interpreter.pyparser import automata +from pypy.interpreter.pyparser.parser import Token from pypy.interpreter.pyparser.pygram import tokens from pypy.interpreter.pyparser.pytoken import python_opmap from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError, TabError @@ -144,7 +145,7 @@ endmatch = endDFA.recognize(line) if endmatch >= 0: pos = end = endmatch - tok = (tokens.STRING, contstr + line[:end], strstart[0], + tok = Token(tokens.STRING, contstr + line[:end], strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -152,7 +153,7 @@ contline = None elif (needcont and not line.endswith('\\\n') and not line.endswith('\\\r\n')): - tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], + tok = Token(tokens.ERRORTOKEN, contstr + line, strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -200,13 +201,13 @@ raise TabError(lnum, pos, line) indents.append(column) altindents.append(altcolumn) - token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) + token_list.append(Token(tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' else: while column < indents[-1]: indents.pop() altindents.pop() - token_list.append((tokens.DEDENT, '', lnum, pos, line)) + token_list.append(Token(tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" @@ -246,13 +247,13 @@ if (initial in numchars or \ (initial == '.' and token != '.' and token != '...')): # ordinary number - token_list.append((tokens.NUMBER, token, lnum, start, line)) + token_list.append(Token(tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': if not parenstack: if async_def: async_def_nl = True - tok = (tokens.NEWLINE, last_comment, lnum, start, line) + tok = Token(tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' elif initial == '#': @@ -267,7 +268,7 @@ if endmatch >= 0: # all on one line pos = endmatch token = line[start:pos] - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' else: @@ -286,7 +287,7 @@ contline = line break else: # ordinary string - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' elif (initial in namechars or # ordinary name @@ -303,11 +304,11 @@ if async_def: # inside 'async def' function if token == 'async': - token_list.append((tokens.ASYNC, token, lnum, start, line)) + token_list.append(Token(tokens.ASYNC, token, lnum, start, line)) elif token == 'await': - token_list.append((tokens.AWAIT, token, lnum, start, line)) + token_list.append(Token(tokens.AWAIT, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) elif token == 'async': # async token, look ahead #ahead token if pos < max: @@ -319,16 +320,20 @@ if ahead_token == 'def': async_def = True async_def_indent = indents[-1] - token_list.append((tokens.ASYNC, token, lnum, start, line)) + token_list.append(Token(tokens.ASYNC, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) else: - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) last_comment = '' elif initial == '\\': # continued stmt continued = 1 + elif initial == '$': + token_list.append(Token(tokens.REVDBMETAVAR, token, + lnum, start, line)) + last_comment = '' else: if initial in '([{': parenstack.append((initial, lnum, start, line)) @@ -351,7 +356,7 @@ punct = python_opmap[token] else: punct = tokens.OP - token_list.append((punct, token, lnum, start, line)) + token_list.append(Token(punct, token, lnum, start, line)) last_comment = '' else: start = whiteSpaceDFA.recognize(line, pos) @@ -360,22 +365,22 @@ if start Author: Armin Rigo Branch: Changeset: r94740:937d78fe0a37 Date: 2018-06-09 06:38 +0200 http://bitbucket.org/pypy/pypy/changeset/937d78fe0a37/ Log: Issue #2840 Fix multithreading issues in calls to setenv() diff --git a/rpython/rlib/rposix_environ.py b/rpython/rlib/rposix_environ.py --- a/rpython/rlib/rposix_environ.py +++ b/rpython/rlib/rposix_environ.py @@ -11,6 +11,13 @@ str0 = annmodel.s_Str0 +def llexternal(name, args, result, **kwds): + # Issue #2840 + # All functions defined here should be releasegil=False, both + # because it doesn't make much sense to release the GIL and + # because the OS environment functions are usually not thread-safe + return rffi.llexternal(name, args, result, releasegil=False, **kwds) + # ____________________________________________________________ # # Annotation support to control access to 'os.environ' in the RPython @@ -66,7 +73,7 @@ prefix = '' if sys.platform.startswith('darwin'): CCHARPPP = rffi.CArrayPtr(rffi.CCHARPP) - _os_NSGetEnviron = rffi.llexternal( + _os_NSGetEnviron = llexternal( '_NSGetEnviron', [], CCHARPPP, compilation_info=ExternalCompilationInfo(includes=['crt_externs.h']) ) @@ -119,14 +126,13 @@ def r_putenv(name, value): just_a_placeholder -os_getenv = rffi.llexternal('getenv', [rffi.CCHARP], rffi.CCHARP, - releasegil=False) -os_putenv = rffi.llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT, +os_getenv = llexternal('getenv', [rffi.CCHARP], rffi.CCHARP) +os_putenv = llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO) if _WIN32: - _wgetenv = rffi.llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, - compilation_info=eci, releasegil=False) - _wputenv = rffi.llexternal('_wputenv', [rffi.CWCHARP], rffi.INT, + _wgetenv = llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, + compilation_info=eci) + _wputenv = llexternal('_wputenv', [rffi.CWCHARP], rffi.INT, compilation_info=eci, save_err=rffi.RFFI_SAVE_LASTERROR) @@ -204,7 +210,7 @@ REAL_UNSETENV = False if hasattr(__import__(os.name), 'unsetenv'): - os_unsetenv = rffi.llexternal('unsetenv', [rffi.CCHARP], rffi.INT, + os_unsetenv = llexternal('unsetenv', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO) def unsetenv_llimpl(name): From pypy.commits at gmail.com Sat Jun 9 01:45:35 2018 From: pypy.commits at gmail.com (wlav) Date: Fri, 08 Jun 2018 22:45:35 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: method template improvements Message-ID: <5b1b697f.1c69fb81.79ad.ada2@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94741:068d5604f6e9 Date: 2018-06-07 22:35 -0700 http://bitbucket.org/pypy/pypy/changeset/068d5604f6e9/ Log: method template improvements diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -677,7 +677,7 @@ W_CPPOverload.__init__(self, space, declaring_scope, functions, flags) self.name = name self.overloads = {} - self.master = None + self.master = self @unwrap_spec(args_w='args_w') def descr_get(self, w_cppinstance, args_w): @@ -685,15 +685,36 @@ return self # unbound cppol = W_CPPTemplateOverload(self.space, self.name, self.scope, self.functions, self.flags) cppol.w_this = w_cppinstance - cppol.master = self + cppol.master = self.master return cppol # bound @unwrap_spec(args_w='args_w') + def call(self, args_w): + # direct call means attempt to deduce types ourselves + # first, try to match with existing methods + for cppol in self.master.overloads.values(): + try: + cppol.descr_get(self.w_this, []).call(args_w) + except Exception as e: + pass # completely ignore for now; have to see whether errors become confusing + + # if all failed, then try to deduce type + types_w = [self.space.type(obj_w) for obj_w in args_w] + method = self.getitem(types_w) + return method.call(args_w) + + @unwrap_spec(args_w='args_w') def getitem(self, args_w): space = self.space + + if space.isinstance_w(args_w[0], space.w_tuple): + w_args = args_w[0] + else: + w_args = space.newtuple(args_w) + tmpl_args = '' - for i in range(len(args_w)): - w_obj = args_w[i] + for i in range(space.len_w(w_args)): + w_obj = space.getitem(w_args, space.newint(i)) if space.isinstance_w(w_obj, space.w_text): s = space.text_w(w_obj) # string describing type elif space.isinstance_w(w_obj, space.w_type): @@ -712,22 +733,23 @@ fullname = self.name+'<'+tmpl_args+'>' # find/instantiate new callable function - master = self.master - if not master: - master = self try: - return master.overloads[fullname].descr_get(self.w_this, []) + return self.master.overloads[fullname].descr_get(self.w_this, []) except KeyError: pass cppmeth = capi.c_get_method_template(space, self.scope, fullname) + if not cppmeth: + raise oefmt(self.space.w_AttributeError, + "scope '%s' has no function %s", self.scope.name, fullname) + funcs = [] ftype = self.scope._make_cppfunction(fullname, cppmeth, funcs) if ftype & FUNCTION_IS_STATIC: cppol = W_CPPStaticOverload(space, self.scope, funcs[:], self.flags) else: cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) - master.overloads[fullname] = cppol + self.master.overloads[fullname] = cppol return cppol.descr_get(self.w_this, []) def __repr__(self): From pypy.commits at gmail.com Sat Jun 9 01:45:38 2018 From: pypy.commits at gmail.com (wlav) Date: Fri, 08 Jun 2018 22:45:38 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: further support for templated methods and for sfinae Message-ID: <5b1b6982.1c69fb81.26a75.b216@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94742:c68cd6b1c308 Date: 2018-06-08 22:26 -0700 http://bitbucket.org/pypy/pypy/changeset/c68cd6b1c308/ Log: further support for templated methods and for sfinae diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -69,7 +69,8 @@ space = self.space cif_descr = self.cif_descr size = cif_descr.exchange_size - raw_string = rffi.cast(rffi.CCHARP, 0) # only ever have one in the CAPI + raw_string1 = rffi.cast(rffi.CCHARP, 0) + raw_string2 = rffi.cast(rffi.CCHARP, 0) # have max two in any CAPI buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw') try: for i in range(len(args)): @@ -91,11 +92,15 @@ else: # only other use is string assert obj.tc == 's' n = len(obj._string) - assert raw_string == rffi.cast(rffi.CCHARP, 0) - # XXX could use rffi.get_nonmovingbuffer_final_null() - raw_string = rffi.str2charp(obj._string) data = rffi.cast(rffi.CCHARPP, data) - data[0] = raw_string + if raw_string1 == rffi.cast(rffi.CCHARP, 0): + # XXX could use rffi.get_nonmovingbuffer_final_null() + raw_string1 = rffi.str2charp(obj._string) + data[0] = raw_string1 + else: + assert raw_string2 == rffi.cast(rffi.CCHARP, 0) + raw_string2 = rffi.str2charp(obj._string) + data[0] = raw_string2 jit_libffi.jit_ffi_call(cif_descr, rffi.cast(rffi.VOIDP, funcaddr), @@ -106,8 +111,10 @@ # immediate unwrapping, the round-trip is removed w_res = self.ctitem.copy_and_convert_to_object(resultdata) finally: - if raw_string != rffi.cast(rffi.CCHARP, 0): - rffi.free_charp(raw_string) + if raw_string1 != rffi.cast(rffi.CCHARP, 0): + rffi.free_charp(raw_string1) + if raw_string2 != rffi.cast(rffi.CCHARP, 0): + rffi.free_charp(raw_string2) lltype.free(buffer, flavor='raw') return w_res @@ -218,6 +225,7 @@ 'get_method' : ([c_scope, c_index], c_method), 'method_name' : ([c_method], c_ccharp), + 'method_full_name' : ([c_method], c_ccharp), 'method_mangled_name' : ([c_method], c_ccharp), 'method_result_type' : ([c_method], c_ccharp), 'method_num_args' : ([c_method], c_int), @@ -528,6 +536,8 @@ def c_method_name(space, cppmeth): return charp2str_free(space, call_capi(space, 'method_name', [_ArgH(cppmeth)])) +def c_method_full_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_full_name', [_ArgH(cppmeth)])) def c_method_mangled_name(space, cppmeth): return charp2str_free(space, call_capi(space, 'method_mangled_name', [_ArgH(cppmeth)])) def c_method_result_type(space, cppmeth): @@ -558,8 +568,8 @@ args = [_ArgH(cppscope.handle), _ArgL(index)] return space.bool_w(call_capi(space, 'method_is_template', args)) -def c_get_method_template(space, cppscope, name): - args = [_ArgH(cppscope.handle), _ArgS(name)] +def c_get_method_template(space, cppscope, name, proto): + args = [_ArgH(cppscope.handle), _ArgS(name), _ArgS(proto)] return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method_template', args))) def c_get_global_operator(space, nss, lc, rc, op): if nss is not None: diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -492,8 +492,8 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPInstance if isinstance(w_obj, W_CPPInstance): - from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE - if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_RVALUE + if w_obj.flags & INSTANCE_FLAGS_IS_RVALUE: # reject moves as all are explicit raise ValueError("lvalue expected") if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): @@ -522,11 +522,18 @@ class InstanceMoveConverter(InstanceRefConverter): def _unwrap_object(self, space, w_obj): # moving is same as by-ref, but have to check that move is allowed - from pypy.module._cppyy.interp_cppyy import W_CPPInstance, INSTANCE_FLAGS_IS_R_VALUE - if isinstance(w_obj, W_CPPInstance): - if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: - w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE - return InstanceRefConverter._unwrap_object(self, space, w_obj) + from pypy.module._cppyy.interp_cppyy import W_CPPInstance, INSTANCE_FLAGS_IS_RVALUE + obj = space.interp_w(W_CPPInstance, w_obj) + if obj: + if obj.flags & INSTANCE_FLAGS_IS_RVALUE: + obj.flags &= ~INSTANCE_FLAGS_IS_RVALUE + try: + return InstanceRefConverter._unwrap_object(self, space, w_obj) + except Exception: + # TODO: if the method fails on some other converter, then the next + # overload can not be an rvalue anymore + obj.flags |= INSTANCE_FLAGS_IS_RVALUE + raise raise oefmt(space.w_ValueError, "object is not an rvalue") diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -135,6 +135,8 @@ RPY_EXTERN char* cppyy_method_name(cppyy_method_t); RPY_EXTERN + char* cppyy_method_full_name(cppyy_method_t); + RPY_EXTERN char* cppyy_method_mangled_name(cppyy_method_t); RPY_EXTERN char* cppyy_method_result_type(cppyy_method_t); @@ -158,7 +160,7 @@ RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN - cppyy_method_t cppyy_get_method_template(cppyy_scope_t scope, const char* name); + cppyy_method_t cppyy_get_method_template(cppyy_scope_t scope, const char* name, const char* proto); RPY_EXTERN cppyy_index_t cppyy_get_global_operator( diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -18,7 +18,7 @@ INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 INSTANCE_FLAGS_IS_REF = 0x0002 -INSTANCE_FLAGS_IS_R_VALUE = 0x0004 +INSTANCE_FLAGS_IS_RVALUE = 0x0004 OVERLOAD_FLAGS_USE_FFI = 0x0001 @@ -679,6 +679,46 @@ self.overloads = {} self.master = self + def construct_template_args(self, w_args): + space = self.space + tmpl_args = '' + for i in range(space.len_w(w_args)): + w_obj = space.getitem(w_args, space.newint(i)) + if space.isinstance_w(w_obj, space.w_text): + s = space.text_w(w_obj) # string describing type + elif space.isinstance_w(w_obj, space.w_type): + try: + # cppyy bound types + name = space.getattr(w_obj, space.newtext('__cppname__')) + except OperationError: + # generic python types + name = space.getattr(w_obj, space.newtext('__name__')) + s = space.text_w(name) + else: + # builtin types etc. + s = space.text_w(space.str(w_obj)) + # map python types -> C++ types + if s == 'str': s = 'std::string' + if i != 0: tmpl_args += ', ' + tmpl_args += s + return tmpl_args + + def find_method_template(self, name, proto = ''): + # find/instantiate new callable function + space = self.space + cppmeth = capi.c_get_method_template(space, self.scope, name, proto) + if not cppmeth: + raise oefmt(self.space.w_AttributeError, + "scope '%s' has no function %s", self.scope.name, name) + + funcs = [] + ftype = self.scope._make_cppfunction(name, cppmeth, funcs) + if ftype & FUNCTION_IS_STATIC: + cppol = W_CPPStaticOverload(space, self.scope, funcs[:], self.flags) + else: + cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) + return cppol + @unwrap_spec(args_w='args_w') def descr_get(self, w_cppinstance, args_w): if self.space.is_w(w_cppinstance, self.space.w_None): @@ -695,13 +735,21 @@ for cppol in self.master.overloads.values(): try: cppol.descr_get(self.w_this, []).call(args_w) - except Exception as e: + except Exception: pass # completely ignore for now; have to see whether errors become confusing - # if all failed, then try to deduce type - types_w = [self.space.type(obj_w) for obj_w in args_w] - method = self.getitem(types_w) - return method.call(args_w) + # if all failed, then try to deduce from argument types + w_types = self.space.newtuple([self.space.type(obj_w) for obj_w in args_w]) + proto = self.construct_template_args(w_types) + method = self.find_method_template(self.name, proto) + + # only cache result if the name retains the full template + if len(method.functions) == 1: + fullname = capi.c_method_full_name(self.space, method.functions[0].cppmethod) + if 0 <= fullname.rfind('>'): + self.master.overloads[fullname] = method + + return method.descr_get(self.w_this, []).call(args_w) @unwrap_spec(args_w='args_w') def getitem(self, args_w): @@ -712,45 +760,17 @@ else: w_args = space.newtuple(args_w) - tmpl_args = '' - for i in range(space.len_w(w_args)): - w_obj = space.getitem(w_args, space.newint(i)) - if space.isinstance_w(w_obj, space.w_text): - s = space.text_w(w_obj) # string describing type - elif space.isinstance_w(w_obj, space.w_type): - try: - # cppyy bound types - name = space.getattr(w_obj, space.newtext('__cppname__')) - except OperationError: - # generic python types - name = space.getattr(w_obj, space.newtext('__name__')) - s = space.text_w(name) - else: - # builtin types etc. - s = space.text_w(space.str(w_obj)) - if i != 0: tmpl_args += ', ' - tmpl_args += s + tmpl_args = self.construct_template_args(w_args) fullname = self.name+'<'+tmpl_args+'>' + try: + method = self.master.overloads[fullname] + except KeyError: + method = self.find_method_template(fullname) - # find/instantiate new callable function - try: - return self.master.overloads[fullname].descr_get(self.w_this, []) - except KeyError: - pass + # cache result (name is always full templated name) + self.master.overloads[fullname] = method - cppmeth = capi.c_get_method_template(space, self.scope, fullname) - if not cppmeth: - raise oefmt(self.space.w_AttributeError, - "scope '%s' has no function %s", self.scope.name, fullname) - - funcs = [] - ftype = self.scope._make_cppfunction(fullname, cppmeth, funcs) - if ftype & FUNCTION_IS_STATIC: - cppol = W_CPPStaticOverload(space, self.scope, funcs[:], self.flags) - else: - cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) - self.master.overloads[fullname] = cppol - return cppol.descr_get(self.w_this, []) + return method.descr_get(self.w_this, []) def __repr__(self): return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] @@ -1502,7 +1522,7 @@ """Casts the given instance into an C++-style rvalue.""" obj = space.interp_w(W_CPPInstance, w_obj) if obj: - obj.flags |= INSTANCE_FLAGS_IS_R_VALUE + obj.flags |= INSTANCE_FLAGS_IS_RVALUE return w_obj diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -8,9 +8,9 @@ # the interp-level does not support metaclasses, they are created at app-level. # These are the metaclass base classes: class CPPScope(type): - def __getattr__(self, name): + def __getattr__(self, name, type_only=False): try: - return get_scoped_pycppitem(self, name) # will cache on self + return get_scoped_pycppitem(self, name, type_only) # will cache on self except Exception as e: raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) @@ -52,7 +52,11 @@ fullname = ''.join( [self._name, '<', ','.join(map(self._arg_to_str, args))]) fullname += '>' - return getattr(self._scope, fullname) + try: + return getattr(self._scope, fullname, True) + except AttributeError: + pass + raise TypeError("%s does not exist" % fullname) def __getitem__(self, *args): if args and type(args[0]) == tuple: @@ -214,7 +218,7 @@ return CPPTemplate(template_name, scope) -def get_scoped_pycppitem(scope, name): +def get_scoped_pycppitem(scope, name, type_only=False): import _cppyy # resolve typedefs/aliases: these may cross namespaces, in which case @@ -237,6 +241,9 @@ else: pycppitem = make_cppclass(scope, name, cppitem) + if type_only: + return pycppitem + # templates if not cppitem: cppitem = _cppyy._is_template(final_scoped_name) diff --git a/pypy/module/_cppyy/src/dummy_backend.cxx b/pypy/module/_cppyy/src/dummy_backend.cxx --- a/pypy/module/_cppyy/src/dummy_backend.cxx +++ b/pypy/module/_cppyy/src/dummy_backend.cxx @@ -941,6 +941,10 @@ return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_name); } +char* cppyy_method_full_name(cppyy_method_t method) { + return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_name); +} + char* cppyy_method_result_type(cppyy_method_t method) { return cppstring_to_cstring(((Cppyy_PseudoMethodInfo*)method)->m_returntype); } diff --git a/pypy/module/_cppyy/test/advancedcpp.cxx b/pypy/module/_cppyy/test/advancedcpp.cxx --- a/pypy/module/_cppyy/test/advancedcpp.cxx +++ b/pypy/module/_cppyy/test/advancedcpp.cxx @@ -77,6 +77,10 @@ double my_global_array[500]; static double sd = 1234.; double* my_global_ptr = &sd; +some_int_holder my_global_int_holders[5] = { + some_int_holder(13), some_int_holder(42), some_int_holder(88), + some_int_holder(-1), some_int_holder(17) }; + // for life-line and identity testing int some_class_with_data::some_data::s_num_data = 0; diff --git a/pypy/module/_cppyy/test/advancedcpp.h b/pypy/module/_cppyy/test/advancedcpp.h --- a/pypy/module/_cppyy/test/advancedcpp.h +++ b/pypy/module/_cppyy/test/advancedcpp.h @@ -266,8 +266,8 @@ class some_comparable { }; -bool operator==(const some_comparable& c1, const some_comparable& c2 ); -bool operator!=( const some_comparable& c1, const some_comparable& c2 ); +bool operator==(const some_comparable& c1, const some_comparable& c2); +bool operator!=(const some_comparable& c1, const some_comparable& c2); //=========================================================================== @@ -276,6 +276,17 @@ extern double* my_global_ptr; static const char my_global_string[] = "aap " " noot " " mies"; +class some_int_holder { +public: + some_int_holder(int val) : m_val(val) {} + +public: + int m_val; + char gap[7]; +}; +extern some_int_holder my_global_int_holders[5]; + + //=========================================================================== class some_class_with_data { // for life-line and identity testing public: diff --git a/pypy/module/_cppyy/test/advancedcpp.xml b/pypy/module/_cppyy/test/advancedcpp.xml --- a/pypy/module/_cppyy/test/advancedcpp.xml +++ b/pypy/module/_cppyy/test/advancedcpp.xml @@ -40,6 +40,7 @@ + diff --git a/pypy/module/_cppyy/test/test_pythonization.py b/pypy/module/_cppyy/test/test_pythonization.py --- a/pypy/module/_cppyy/test/test_pythonization.py +++ b/pypy/module/_cppyy/test/test_pythonization.py @@ -75,6 +75,9 @@ import _cppyy as cppyy + # TODO: disabled for now until decided on proper naming/iface + return + cppyy.gbl.pyzables.GimeDerived._creates = True result = cppyy.gbl.pyzables.GimeDerived() diff --git a/pypy/module/_cppyy/test/test_templates.py b/pypy/module/_cppyy/test/test_templates.py --- a/pypy/module/_cppyy/test/test_templates.py +++ b/pypy/module/_cppyy/test/test_templates.py @@ -124,7 +124,7 @@ import _cppyy Obj1 = _cppyy.gbl.AttrTesting.Obj1 Obj2 = _cppyy.gbl.AttrTesting.Obj2 - select_template_arg = _cppyy.gbl.AttrTesting.has_var1 + select_template_arg = _cppyy.gbl.AttrTesting.select_template_arg #assert select_template_arg[0, Obj1, Obj2].argument == Obj1 assert select_template_arg[1, Obj1, Obj2].argument == Obj2 diff --git a/pypy/module/_cppyy/test/test_zjit.py b/pypy/module/_cppyy/test/test_zjit.py --- a/pypy/module/_cppyy/test/test_zjit.py +++ b/pypy/module/_cppyy/test/test_zjit.py @@ -61,6 +61,10 @@ typename = "str" def __init__(self, val): self.val = val +class FakeTuple(FakeBase): + typename = "tuple" + def __init__(self, val): + self.val = val class FakeType(FakeBase): typename = "type" def __init__(self, name): @@ -172,6 +176,13 @@ def newtext(self, obj): return FakeString(obj) + @specialize.argtype(1) + def newtuple(self, obj): + return FakeTuple(obj) + + def getitem(self, coll, i): + return coll.val[i.val] + def float_w(self, w_obj, allow_conversion=True): assert isinstance(w_obj, FakeFloat) return w_obj.val From pypy.commits at gmail.com Sat Jun 9 02:08:42 2018 From: pypy.commits at gmail.com (mattip) Date: Fri, 08 Jun 2018 23:08:42 -0700 (PDT) Subject: [pypy-commit] pypy default: add missing defines and typedefs for numpy, pandas on MSVC Message-ID: <5b1b6eea.1c69fb81.86189.6e45@mx.google.com> Author: Matti Picus Branch: Changeset: r94743:d246497b019e Date: 2018-06-09 05:10 -0700 http://bitbucket.org/pypy/pypy/changeset/d246497b019e/ Log: add missing defines and typedefs for numpy, pandas on MSVC diff --git a/pypy/module/cpyext/include/pyconfig.h b/pypy/module/cpyext/include/pyconfig.h --- a/pypy/module/cpyext/include/pyconfig.h +++ b/pypy/module/cpyext/include/pyconfig.h @@ -28,16 +28,27 @@ #endif #ifndef Py_BUILD_CORE /* not building the core - must be an ext */ -# if defined(_MSC_VER) && !defined(_CFFI_) - /* So MSVC users need not specify the .lib file in - * their Makefile (other compilers are generally - * taken care of by distutils.) */ -# ifdef _DEBUG -# error("debug first with cpython") -# pragma comment(lib,"python27.lib") -# else -# pragma comment(lib,"python27.lib") -# endif /* _DEBUG */ +# if defined(_MSC_VER) && !defined(_CFFI_) + /* So MSVC users need not specify the .lib file in + * their Makefile (other compilers are generally + * taken care of by distutils.) + */ +# ifdef _DEBUG +# error("debug first with cpython") +# pragma comment(lib,"python27.lib") +# else +# pragma comment(lib,"python27.lib") +# endif /* _DEBUG */ +# define HAVE_COPYSIGN 1 +# define copysign _copysign +# ifdef MS_WIN64 + typedef __int64 ssize_t; +# else + typedef _W64 int ssize_t; +# endif +#define HAVE_SSIZE_T 1 + + # endif #endif /* _MSC_VER */ From pypy.commits at gmail.com Sat Jun 9 02:08:44 2018 From: pypy.commits at gmail.com (mattip) Date: Fri, 08 Jun 2018 23:08:44 -0700 (PDT) Subject: [pypy-commit] pypy default: try to describe setting up a current MSVC build env for py3, until it changes Message-ID: <5b1b6eec.1c69fb81.4b81a.c029@mx.google.com> Author: Matti Picus Branch: Changeset: r94744:109e417f3057 Date: 2018-06-09 06:00 -0700 http://bitbucket.org/pypy/pypy/changeset/109e417f3057/ Log: try to describe setting up a current MSVC build env for py3, until it changes diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -29,29 +29,28 @@ ``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` or in ``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. -A current version of ``setuptools`` will be able to find it there. For -Windows 10, you must right-click the download, and under ``Properties`` -> -``Compatibility`` mark it as ``Run run this program in comatibility mode for`` -``Previous version...``. Also, you must download and install the ``.Net Framework 3.5``, +A current version of ``setuptools`` will be able to find it there. +Also, you must download and install the ``.Net Framework 3.5``, otherwise ``mt.exe`` will silently fail. Installation will begin automatically by running the mt.exe command by hand from a DOS window (that is how the author discovered the problem). .. _Microsoft Visual C++ Compiler for Python 2.7: https://www.microsoft.com/EN-US/DOWNLOAD/DETAILS.ASPX?ID=44266 -Installing "Build Tools for Visual Studio 2017" (for Python 3) +Installing "Build Tools for Visual Studio 2015" (for Python 3) -------------------------------------------------------------- -As documented in the CPython Wiki_, CPython now recommends Visual C++ version -14.0. A compact version of the compiler suite can be obtained from Microsoft_ -downloads, search the page for "Build Tools for Visual Studio 2017". +As documented in the CPython Wiki_, CPython recommends Visual C++ version +14.0 for python version 3.5. A compact version of the compiler suite can be +obtained from Microsoft_ downloads, search the page for "Microsoft Build Tools 2015". -You will also need to install the the `Windows SDK`_ in order to use the -`mt.exe` mainfest compiler. +You will need to reboot the computer for the installation to successfully install and +run the `mt.exe` mainfest compiler. The installation will set the +`VS140COMNTOOLS` environment variable, this is key to distutils/setuptools +finding the compiler .. _Wiki: https://wiki.python.org/moin/WindowsCompilers -.. _Microsoft: https://www.visualstudio.com/downloads -.. _`Windows SDK`: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk +.. _Microsoft: https://www.visualstudio.com/vs/older-downloads/ Translating PyPy with Visual Studio ----------------------------------- @@ -99,6 +98,9 @@ Setting Up Visual Studio 9.0 for building SSL in Python3 -------------------------------------------------------- +**Note: this is old information, left for historical reference. We recommend +using Visual Studio 2015, which now seems to properly set this all up.** + On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after translation. However ``distutils`` does not support the Micorosft-provided Visual C compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The @@ -146,14 +148,14 @@ Installing external packages ---------------------------- -We uses a `repository` parallel to pypy to hold binary compiled versions of the +We uses a subrepository_ inside pypy to hold binary compiled versions of the build dependencies for windows. As part of the `rpython` setup stage, environment variables will be set to use these dependencies. The repository has a README file on how to replicate, and a branch for each supported platform. You may run the `get_externals.py` utility to checkout the proper branch for your platform and PyPy version. -.. _repository: https://bitbucket.org/pypy/external +.. _subrepository: https://bitbucket.org/pypy/external Using the mingw compiler ------------------------ From pypy.commits at gmail.com Sat Jun 9 02:11:41 2018 From: pypy.commits at gmail.com (mattip) Date: Fri, 08 Jun 2018 23:11:41 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: merge default into branch Message-ID: <5b1b6f9d.1c69fb81.132e9.bf54@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r94745:2e791fab4895 Date: 2018-06-09 06:10 -0700 http://bitbucket.org/pypy/pypy/changeset/2e791fab4895/ Log: merge default into branch diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -29,29 +29,28 @@ ``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` or in ``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. -A current version of ``setuptools`` will be able to find it there. For -Windows 10, you must right-click the download, and under ``Properties`` -> -``Compatibility`` mark it as ``Run run this program in comatibility mode for`` -``Previous version...``. Also, you must download and install the ``.Net Framework 3.5``, +A current version of ``setuptools`` will be able to find it there. +Also, you must download and install the ``.Net Framework 3.5``, otherwise ``mt.exe`` will silently fail. Installation will begin automatically by running the mt.exe command by hand from a DOS window (that is how the author discovered the problem). .. _Microsoft Visual C++ Compiler for Python 2.7: https://www.microsoft.com/EN-US/DOWNLOAD/DETAILS.ASPX?ID=44266 -Installing "Build Tools for Visual Studio 2017" (for Python 3) +Installing "Build Tools for Visual Studio 2015" (for Python 3) -------------------------------------------------------------- -As documented in the CPython Wiki_, CPython now recommends Visual C++ version -14.0. A compact version of the compiler suite can be obtained from Microsoft_ -downloads, search the page for "Build Tools for Visual Studio 2017". +As documented in the CPython Wiki_, CPython recommends Visual C++ version +14.0 for python version 3.5. A compact version of the compiler suite can be +obtained from Microsoft_ downloads, search the page for "Microsoft Build Tools 2015". -You will also need to install the the `Windows SDK`_ in order to use the -`mt.exe` mainfest compiler. +You will need to reboot the computer for the installation to successfully install and +run the `mt.exe` mainfest compiler. The installation will set the +`VS140COMNTOOLS` environment variable, this is key to distutils/setuptools +finding the compiler .. _Wiki: https://wiki.python.org/moin/WindowsCompilers -.. _Microsoft: https://www.visualstudio.com/downloads -.. _`Windows SDK`: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk +.. _Microsoft: https://www.visualstudio.com/vs/older-downloads/ Translating PyPy with Visual Studio ----------------------------------- @@ -99,6 +98,9 @@ Setting Up Visual Studio 9.0 for building SSL in Python3 -------------------------------------------------------- +**Note: this is old information, left for historical reference. We recommend +using Visual Studio 2015, which now seems to properly set this all up.** + On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after translation. However ``distutils`` does not support the Micorosft-provided Visual C compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The @@ -146,14 +148,14 @@ Installing external packages ---------------------------- -We uses a `repository` parallel to pypy to hold binary compiled versions of the +We uses a subrepository_ inside pypy to hold binary compiled versions of the build dependencies for windows. As part of the `rpython` setup stage, environment variables will be set to use these dependencies. The repository has a README file on how to replicate, and a branch for each supported platform. You may run the `get_externals.py` utility to checkout the proper branch for your platform and PyPy version. -.. _repository: https://bitbucket.org/pypy/external +.. _subrepository: https://bitbucket.org/pypy/external Using the mingw compiler ------------------------ diff --git a/pypy/module/cpyext/include/pyconfig.h b/pypy/module/cpyext/include/pyconfig.h --- a/pypy/module/cpyext/include/pyconfig.h +++ b/pypy/module/cpyext/include/pyconfig.h @@ -31,16 +31,27 @@ #endif #ifndef Py_BUILD_CORE /* not building the core - must be an ext */ -# if defined(_MSC_VER) && !defined(_CFFI_) - /* So MSVC users need not specify the .lib file in - * their Makefile (other compilers are generally - * taken care of by distutils.) */ -# ifdef _DEBUG -# error("debug first with cpython") +# if defined(_MSC_VER) && !defined(_CFFI_) + /* So MSVC users need not specify the .lib file in + * their Makefile (other compilers are generally + * taken care of by distutils.) + */ +# ifdef _DEBUG +# error("debug first with cpython") # pragma comment(lib,"python35.lib") -# else +# else # pragma comment(lib,"python35.lib") -# endif /* _DEBUG */ +# endif /* _DEBUG */ +# define HAVE_COPYSIGN 1 +# define copysign _copysign +# ifdef MS_WIN64 + typedef __int64 ssize_t; +# else + typedef _W64 int ssize_t; +# endif +#define HAVE_SSIZE_T 1 + + # endif #endif /* _MSC_VER */ diff --git a/rpython/rlib/rposix_environ.py b/rpython/rlib/rposix_environ.py --- a/rpython/rlib/rposix_environ.py +++ b/rpython/rlib/rposix_environ.py @@ -11,6 +11,13 @@ str0 = annmodel.s_Str0 +def llexternal(name, args, result, **kwds): + # Issue #2840 + # All functions defined here should be releasegil=False, both + # because it doesn't make much sense to release the GIL and + # because the OS environment functions are usually not thread-safe + return rffi.llexternal(name, args, result, releasegil=False, **kwds) + # ____________________________________________________________ # # Annotation support to control access to 'os.environ' in the RPython @@ -66,7 +73,7 @@ prefix = '' if sys.platform.startswith('darwin'): CCHARPPP = rffi.CArrayPtr(rffi.CCHARPP) - _os_NSGetEnviron = rffi.llexternal( + _os_NSGetEnviron = llexternal( '_NSGetEnviron', [], CCHARPPP, compilation_info=ExternalCompilationInfo(includes=['crt_externs.h']) ) @@ -119,14 +126,13 @@ def r_putenv(name, value): just_a_placeholder -os_getenv = rffi.llexternal('getenv', [rffi.CCHARP], rffi.CCHARP, - releasegil=False) -os_putenv = rffi.llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT, +os_getenv = llexternal('getenv', [rffi.CCHARP], rffi.CCHARP) +os_putenv = llexternal(prefix + 'putenv', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO) if _WIN32: - _wgetenv = rffi.llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, - compilation_info=eci, releasegil=False) - _wputenv = rffi.llexternal('_wputenv', [rffi.CWCHARP], rffi.INT, + _wgetenv = llexternal('_wgetenv', [rffi.CWCHARP], rffi.CWCHARP, + compilation_info=eci) + _wputenv = llexternal('_wputenv', [rffi.CWCHARP], rffi.INT, compilation_info=eci, save_err=rffi.RFFI_SAVE_LASTERROR) @@ -206,7 +212,7 @@ REAL_UNSETENV = False if hasattr(__import__(os.name), 'unsetenv'): - os_unsetenv = rffi.llexternal('unsetenv', [rffi.CCHARP], rffi.INT, + os_unsetenv = llexternal('unsetenv', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO) def unsetenv_llimpl(name): From pypy.commits at gmail.com Sat Jun 9 20:01:39 2018 From: pypy.commits at gmail.com (wlav) Date: Sat, 09 Jun 2018 17:01:39 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: further template function use cases Message-ID: <5b1c6a63.1c69fb81.386b2.2cf4@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94746:6e179cce7824 Date: 2018-06-09 16:42 -0700 http://bitbucket.org/pypy/pypy/changeset/6e179cce7824/ Log: further template function use cases diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -162,10 +162,11 @@ # - overloads: user-facing collections of overloaded functions # - wrappers: internal holders of the individual C++ methods # -# W_CPPOverload: instance methods (base class) -# W_CPPConstructorOverload: constructors -# W_CPPStaticOverload: free and static functions -# W_CPPTemplateOverload: templated methods/functions +# W_CPPOverload: instance methods (base class) +# W_CPPConstructorOverload: constructors +# W_CPPStaticOverload: free and static functions +# W_CPPTemplateOverload: templated methods +# W_CPPTemplateStaticOveload: templated free and static functions # # CPPMethod: a single function or method (base class) # CPPSetItem: specialization for Python's __setitem__ @@ -666,18 +667,10 @@ ) -class W_CPPTemplateOverload(W_CPPOverload): - """App-level dispatcher to allow both lookup/instantiation of templated methods and - dispatch among overloads between templated and non-templated overloads.""" +class TemplateOverloadMixin(object): + """Mixin to instantiate templated methods/functions.""" - _attrs_ = ['name', 'overloads', 'master'] - _immutable_fields_ = ['name'] - - def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): - W_CPPOverload.__init__(self, space, declaring_scope, functions, flags) - self.name = name - self.overloads = {} - self.master = self + _mixin_ = True def construct_template_args(self, w_args): space = self.space @@ -719,19 +712,8 @@ cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) return cppol - @unwrap_spec(args_w='args_w') - def descr_get(self, w_cppinstance, args_w): - if self.space.is_w(w_cppinstance, self.space.w_None): - return self # unbound - cppol = W_CPPTemplateOverload(self.space, self.name, self.scope, self.functions, self.flags) - cppol.w_this = w_cppinstance - cppol.master = self.master - return cppol # bound - - @unwrap_spec(args_w='args_w') - def call(self, args_w): - # direct call means attempt to deduce types ourselves - # first, try to match with existing methods + def instantiation_from_args(self, args_w): + # try to match with run-time instantiations for cppol in self.master.overloads.values(): try: cppol.descr_get(self.w_this, []).call(args_w) @@ -772,6 +754,42 @@ return method.descr_get(self.w_this, []) + +class W_CPPTemplateOverload(W_CPPOverload, TemplateOverloadMixin): + """App-level dispatcher to allow both lookup/instantiation of templated methods and + dispatch among overloads between templated and non-templated method.""" + + _attrs_ = ['name', 'overloads', 'master'] + _immutable_fields_ = ['name'] + + def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + W_CPPOverload.__init__(self, space, declaring_scope, functions, flags) + self.name = name + self.overloads = {} + self.master = self + + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + # like W_CPPOverload, but returns W_CPPTemplateOverload + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound, so no new instance needed + cppol = W_CPPTemplateOverload(self.space, self.name, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + # direct call: either pick non-templated overload or attempt to deduce + # the template instantiation from the argument types + + # try existing overloads or compile-time instantiations + try: + return W_CPPOverload.call(self, args_w) + except Exception: + pass + + return self.instantiation_from_args(args_w) + def __repr__(self): return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] @@ -784,6 +802,57 @@ __doc__ = GetSetProperty(W_CPPTemplateOverload.fget_doc) ) +class W_CPPTemplateStaticOverload(W_CPPStaticOverload, TemplateOverloadMixin): + """App-level dispatcher to allow both lookup/instantiation of templated methods and + dispatch among overloads between templated and non-templated method.""" + + _attrs_ = ['name', 'overloads', 'master'] + _immutable_fields_ = ['name'] + + def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + W_CPPStaticOverload.__init__(self, space, declaring_scope, functions, flags) + self.name = name + self.overloads = {} + self.master = self + + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + # like W_CPPStaticOverload, but returns W_CPPTemplateStaticOverload + if isinstance(w_cppinstance, W_CPPInstance): + cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance) + if cppinstance.clsdecl.handle != self.scope.handle: + cppol = W_CPPTemplateStaticOverload(self.space, self.name, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + cppol.master = self.master + return cppol # bound + return self # unbound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + # direct call: either pick non-templated overload or attempt to deduce + # the template instantiation from the argument types + + # try existing overloads or compile-time instantiations + try: + return W_CPPStaticOverload.call(self, args_w) + except Exception: + pass + + # try new instantiation + return self.instantiation_from_args(args_w) + + def __repr__(self): + return "W_CPPTemplateStaticOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPTemplateStaticOverload.typedef = TypeDef( + 'CPPTemplateStaticOverload', + __get__ = interp2app(W_CPPTemplateStaticOverload.descr_get), + __getitem__ = interp2app(W_CPPTemplateStaticOverload.getitem), + __call__ = interp2app(W_CPPTemplateStaticOverload.call), + __useffi__ = GetSetProperty(W_CPPTemplateStaticOverload.fget_useffi, W_CPPTemplateStaticOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPTemplateStaticOverload.fget_doc) +) + #----- # Classes for data members: @@ -1006,10 +1075,10 @@ if capi.c_method_is_template(self.space, self, idx): templated = True if templated: - return W_CPPTemplateOverload(self.space, meth_name, self, cppfunctions[:]) + return W_CPPTemplateStaticOverload(self.space, meth_name, self, cppfunctions[:]) return W_CPPStaticOverload(self.space, self, cppfunctions[:]) elif capi.c_exists_method_template(self.space, self, meth_name): - return W_CPPTemplateOverload(self.space, meth_name, self, []) + return W_CPPTemplateStaticOverload(self.space, meth_name, self, []) raise self.missing_attribute_error(meth_name) def find_datamember(self, dm_name): @@ -1092,9 +1161,14 @@ if ftype & FUNCTION_IS_CONSTRUCTOR: overload = W_CPPConstructorOverload(self.space, self, methods[:]) elif ftype & FUNCTION_IS_STATIC: - overload = W_CPPStaticOverload(self.space, self, methods[:]) + if ftype & FUNCTION_IS_TEMPLATE: + cppname = capi.c_method_name(self.space, methods[0].cppmethod) + overload = W_CPPTemplateStaticOverload(self.space, cppname, self, methods[:]) + else: + overload = W_CPPStaticOverload(self.space, self, methods[:]) elif ftype & FUNCTION_IS_TEMPLATE: - overload = W_CPPTemplateOverload(self.space, pyname, self, methods[:]) + cppname = capi.c_method_name(self.space, methods[0].cppmethod) + overload = W_CPPTemplateOverload(self.space, cppname, self, methods[:]) else: overload = W_CPPOverload(self.space, self, methods[:]) self.overloads[pyname] = overload diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -8,9 +8,9 @@ # the interp-level does not support metaclasses, they are created at app-level. # These are the metaclass base classes: class CPPScope(type): - def __getattr__(self, name, type_only=False): + def __getattr__(self, name): try: - return get_scoped_pycppitem(self, name, type_only) # will cache on self + return get_scoped_pycppitem(self, name) # will cache on self except Exception as e: raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) @@ -53,10 +53,13 @@ [self._name, '<', ','.join(map(self._arg_to_str, args))]) fullname += '>' try: - return getattr(self._scope, fullname, True) - except AttributeError: + return self._scope.__dict__[fullname] + except KeyError: pass - raise TypeError("%s does not exist" % fullname) + result = get_scoped_pycppitem(self._scope, fullname, True) + if not result: + raise TypeError("%s does not exist" % fullname) + return result def __getitem__(self, *args): if args and type(args[0]) == tuple: From pypy.commits at gmail.com Sat Jun 9 23:36:23 2018 From: pypy.commits at gmail.com (wlav) Date: Sat, 09 Jun 2018 20:36:23 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: more template tesst Message-ID: <5b1c9cb7.1c69fb81.386b2.407c@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94747:c1d70477c996 Date: 2018-06-09 17:44 -0700 http://bitbucket.org/pypy/pypy/changeset/c1d70477c996/ Log: more template tesst diff --git a/pypy/module/_cppyy/test/test_templates.py b/pypy/module/_cppyy/test/test_templates.py --- a/pypy/module/_cppyy/test/test_templates.py +++ b/pypy/module/_cppyy/test/test_templates.py @@ -89,6 +89,13 @@ #_cppyy.gbl.SomeNS.tuplify(s, 1, 4., "aap") #assert s.str() == '(1, 4, aap) + _cppyy.gbl.gInterpreter.Declare(""" + template + int test04_variadic_func() { return sizeof...(myTypes); } + """) + + assert _cppyy.gbl.test04_variadic_func['int', 'double', 'void*']() == 3 + def test05_variadic_overload(self): """Call an overloaded variadic function""" From pypy.commits at gmail.com Sat Jun 9 23:36:25 2018 From: pypy.commits at gmail.com (wlav) Date: Sat, 09 Jun 2018 20:36:25 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: translator fixes Message-ID: <5b1c9cb9.1c69fb81.a34b3.7004@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94748:fc284f4dd3f0 Date: 2018-06-09 20:17 -0700 http://bitbucket.org/pypy/pypy/changeset/fc284f4dd3f0/ Log: translator fixes diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -712,7 +712,7 @@ cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) return cppol - def instantiation_from_args(self, args_w): + def instantiation_from_args(self, name, args_w): # try to match with run-time instantiations for cppol in self.master.overloads.values(): try: @@ -723,7 +723,7 @@ # if all failed, then try to deduce from argument types w_types = self.space.newtuple([self.space.type(obj_w) for obj_w in args_w]) proto = self.construct_template_args(w_types) - method = self.find_method_template(self.name, proto) + method = self.find_method_template(name, proto) # only cache result if the name retains the full template if len(method.functions) == 1: @@ -733,8 +733,7 @@ return method.descr_get(self.w_this, []).call(args_w) - @unwrap_spec(args_w='args_w') - def getitem(self, args_w): + def getitem_impl(self, name, args_w): space = self.space if space.isinstance_w(args_w[0], space.w_tuple): @@ -743,7 +742,7 @@ w_args = space.newtuple(args_w) tmpl_args = self.construct_template_args(w_args) - fullname = self.name+'<'+tmpl_args+'>' + fullname = name+'<'+tmpl_args+'>' try: method = self.master.overloads[fullname] except KeyError: @@ -788,7 +787,11 @@ except Exception: pass - return self.instantiation_from_args(args_w) + return self.instantiation_from_args(self.name, args_w) + + @unwrap_spec(args_w='args_w') + def getitem(self, args_w): + return self.getitem_impl(self.name, args_w) def __repr__(self): return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] @@ -839,7 +842,11 @@ pass # try new instantiation - return self.instantiation_from_args(args_w) + return self.instantiation_from_args(self.name, args_w) + + @unwrap_spec(args_w='args_w') + def getitem(self, args_w): + return self.getitem_impl(self.name, args_w) def __repr__(self): return "W_CPPTemplateStaticOverload(%s)" % [f.prototype() for f in self.functions] From pypy.commits at gmail.com Sun Jun 10 01:04:24 2018 From: pypy.commits at gmail.com (wlav) Date: Sat, 09 Jun 2018 22:04:24 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: merge default into branch Message-ID: <5b1cb158.1c69fb81.553bf.2349@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94749:d72f3ca3c3fd Date: 2018-06-09 21:45 -0700 http://bitbucket.org/pypy/pypy/changeset/d72f3ca3c3fd/ Log: merge default into branch diff too long, truncating to 2000 out of 12609 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -33,7 +33,12 @@ 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 050d84dd78997f021acf0e133934275d63547cc0 release-pypy2.7-v5.4.1 0e2d9a73f5a1818d0245d75daccdbe21b2d5c3ef release-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +4909c06daf41ce88f87dc01c57959cadad4df4a8 RevDB-pypy2.7-v5.4.1 +d7724c0a5700b895a47de44074cdf5fd659a988f RevDB-pypy2.7-v5.4.1 aff251e543859ce4508159dd9f1a82a2f553de00 release-pypy2.7-v5.6.0 +e90317857d27917bf840caf675832292ee070510 RevDB-pypy2.7-v5.6.1 +a24d6c7000c8099c73d3660857f7e3cee5ac045c RevDB-pypy2.7-v5.6.2 fa3249d55d15b9829e1be69cdf45b5a44cec902d release-pypy2.7-v5.7.0 b16a4363e930f6401bceb499b9520955504c6cb0 release-pypy3.5-v5.7.0 1aa2d8e03cdfab54b7121e93fda7e98ea88a30bf release-pypy2.7-v5.7.1 diff --git a/lib-python/2.7/opcode.py b/lib-python/2.7/opcode.py --- a/lib-python/2.7/opcode.py +++ b/lib-python/2.7/opcode.py @@ -194,5 +194,6 @@ def_op('CALL_METHOD', 202) # #args not including 'self' def_op('BUILD_LIST_FROM_ARG', 203) jrel_op('JUMP_IF_NOT_DEBUG', 204) # jump over assert statements +def_op('LOAD_REVDB_VAR', 205) # reverse debugger (syntax example: $5) del def_op, name_op, jrel_op, jabs_op diff --git a/lib_pypy/grp.py b/lib_pypy/grp.py --- a/lib_pypy/grp.py +++ b/lib_pypy/grp.py @@ -4,6 +4,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -33,32 +35,35 @@ @builtinify def getgrgid(gid): - res = lib.getgrgid(gid) - if not res: - # XXX maybe check error eventually - raise KeyError(gid) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrgid(gid) + if not res: + # XXX maybe check error eventually + raise KeyError(gid) + return _group_from_gstruct(res) @builtinify def getgrnam(name): if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - res = lib.getgrnam(name) - if not res: - raise KeyError("'getgrnam(): name not found: %s'" % name) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrnam(name) + if not res: + raise KeyError("'getgrnam(): name not found: %s'" % name) + return _group_from_gstruct(res) @builtinify def getgrall(): - lib.setgrent() lst = [] - while 1: - p = lib.getgrent() - if not p: - break - lst.append(_group_from_gstruct(p)) - lib.endgrent() + with _lock: + lib.setgrent() + while 1: + p = lib.getgrent() + if not p: + break + lst.append(_group_from_gstruct(p)) + lib.endgrent() return lst __all__ = ('struct_group', 'getgrgid', 'getgrnam', 'getgrall') diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py --- a/lib_pypy/pwd.py +++ b/lib_pypy/pwd.py @@ -12,6 +12,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -55,10 +57,11 @@ Return the password database entry for the given numeric user ID. See pwd.__doc__ for more on password database entries. """ - pw = lib.getpwuid(uid) - if not pw: - raise KeyError("getpwuid(): uid not found: %s" % uid) - return _mkpwent(pw) + with _lock: + pw = lib.getpwuid(uid) + if not pw: + raise KeyError("getpwuid(): uid not found: %s" % uid) + return _mkpwent(pw) @builtinify def getpwnam(name): @@ -71,10 +74,11 @@ if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - pw = lib.getpwnam(name) - if not pw: - raise KeyError("getpwname(): name not found: %s" % name) - return _mkpwent(pw) + with _lock: + pw = lib.getpwnam(name) + if not pw: + raise KeyError("getpwname(): name not found: %s" % name) + return _mkpwent(pw) @builtinify def getpwall(): @@ -84,13 +88,14 @@ See pwd.__doc__ for more on password database entries. """ users = [] - lib.setpwent() - while True: - pw = lib.getpwent() - if not pw: - break - users.append(_mkpwent(pw)) - lib.endpwent() + with _lock: + lib.setpwent() + while True: + pw = lib.getpwent() + if not pw: + break + users.append(_mkpwent(pw)) + lib.endpwent() return users __all__ = ('struct_passwd', 'getpwuid', 'getpwnam', 'getpwall') diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -57,6 +57,11 @@ "termios", "_minimal_curses", ]) +reverse_debugger_disable_modules = set([ + "_continuation", "_vmprof", "_multiprocessing", + "micronumpy", + ]) + # XXX this should move somewhere else, maybe to platform ("is this posixish" # check or something) if sys.platform == "win32": @@ -292,6 +297,9 @@ modules = working_modules.copy() if config.translation.sandbox: modules = default_modules + if config.translation.reverse_debugger: + for mod in reverse_debugger_disable_modules: + setattr(config.objspace.usemodules, mod, False) # ignore names from 'essential_modules', notably 'exceptions', which # may not be present in config.objspace.usemodules at all modules = [name for name in modules if name not in essential_modules] diff --git a/pypy/doc/architecture.rst b/pypy/doc/architecture.rst --- a/pypy/doc/architecture.rst +++ b/pypy/doc/architecture.rst @@ -73,3 +73,63 @@ This division between bytecode evaluator and object space gives a lot of flexibility. One can plug in different :doc:`object spaces ` to get different or enriched behaviours of the Python objects. + +Layers +------ + +RPython +~~~~~~~ +:ref:`RPython ` is the language in which we write interpreters. +Not the entire PyPy project is written in RPython, only the parts that are +compiled in the translation process. The interesting point is that RPython +has no parser, it's compiled from the live python objects, which makes it +possible to do all kinds of metaprogramming during import time. In short, +Python is a meta programming language for RPython. + +The RPython standard library is to be found in the ``rlib`` subdirectory. + +Consult `Getting Started with RPython`_ for further reading + +Translation +~~~~~~~~~~~ +The translation toolchain - this is the part that takes care of translating +RPython to flow graphs and then to C. There is more in the +:doc:`architecture ` document written about it. + +It lives in the ``rpython`` directory: ``flowspace``, ``annotator`` +and ``rtyper``. + +PyPy Interpreter +~~~~~~~~~~~~~~~~ +This is in the ``pypy`` directory. ``pypy/interpreter`` is a standard +interpreter for Python written in RPython. The fact that it is +RPython is not apparent at first. Built-in modules are written in +``pypy/module/*``. Some modules that CPython implements in C are +simply written in pure Python; they are in the top-level ``lib_pypy`` +directory. The standard library of Python (with a few changes to +accomodate PyPy) is in ``lib-python``. + +JIT Compiler +~~~~~~~~~~~~ +:ref:`Just-in-Time Compiler (JIT) `: we have a tracing JIT that traces the +interpreter written in RPython, rather than the user program that it +interprets. As a result it applies to any interpreter, i.e. any +language. But getting it to work correctly is not trivial: it +requires a small number of precise "hints" and possibly some small +refactorings of the interpreter. The JIT itself also has several +almost-independent parts: the tracer itself in ``rpython/jit/metainterp``, the +optimizer in ``rpython/jit/metainterp/optimizer`` that optimizes a list of +residual operations, and the backend in ``rpython/jit/backend/`` +that turns it into machine code. Writing a new backend is a +traditional way to get into the project. + +Garbage Collectors +~~~~~~~~~~~~~~~~~~ +Garbage Collectors (GC): as you may notice if you are used to CPython's +C code, there are no ``Py_INCREF/Py_DECREF`` equivalents in RPython code. +:ref:`rpython:garbage-collection` is inserted +during translation. Moreover, this is not reference counting; it is a real +GC written as more RPython code. The best one we have so far is in +``rpython/memory/gc/incminimark.py``. + +.. _`Getting started with RPython`: http://rpython.readthedocs.org/en/latest/getting-started.html diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -267,14 +267,14 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar commands at install time; the exact list is in - :source:`pypy/tool/release/package.py `. Users + :source:`pypy/tool/release/package.py`. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. * PyPy 2.6 and later: anyone would get ``ImportError: no module named _gdbm_cffi``. Installers need to run ``pypy _gdbm_build.py`` in the ``lib_pypy`` directory during the installation process (plus others; - see the exact list in :source:`pypy/tool/release/package.py `). + see the exact list in :source:`pypy/tool/release/package.py`). Users seeing a broken installation of PyPy can fix it after-the-fact, by running ``pypy /path/to/lib_pypy/_gdbm_build.py``. This command produces a file diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -539,7 +539,7 @@ hg help branch -.. _official wiki: http://mercurial.selenic.com/wiki/Branch +.. _official wiki: https://www.mercurial-scm.org/wiki/ .. _using-development-tracker: @@ -547,15 +547,7 @@ Using the development bug/feature tracker ----------------------------------------- -We have a `development tracker`_, based on Richard Jones' -`roundup`_ application. You can file bugs, -feature requests or see what's going on -for the next milestone, both from an E-Mail and from a -web interface. - -.. _development tracker: https://bugs.pypy.org/ -.. _roundup: http://roundup.sourceforge.net/ - +We use bitbucket for :source:`issues` tracking and :source:`pull-requests`. .. _testing: diff --git a/pypy/doc/commandline_ref.rst b/pypy/doc/commandline_ref.rst --- a/pypy/doc/commandline_ref.rst +++ b/pypy/doc/commandline_ref.rst @@ -8,3 +8,4 @@ :maxdepth: 1 man/pypy.1.rst + man/pypy3.1.rst diff --git a/pypy/doc/conf.py b/pypy/doc/conf.py --- a/pypy/doc/conf.py +++ b/pypy/doc/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.8' +version = '6.0' # The full version, including alpha/beta/rc tags. -release = '5.8.0' +release = '6.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pypy/doc/config/objspace.usemodules._cppyy.txt b/pypy/doc/config/objspace.usemodules._cppyy.txt new file mode 100644 --- /dev/null +++ b/pypy/doc/config/objspace.usemodules._cppyy.txt @@ -0,0 +1,1 @@ +The internal backend for cppyy diff --git a/pypy/doc/config/objspace.usemodules._rawffi.txt b/pypy/doc/config/objspace.usemodules._rawffi.txt --- a/pypy/doc/config/objspace.usemodules._rawffi.txt +++ b/pypy/doc/config/objspace.usemodules._rawffi.txt @@ -1,3 +1,3 @@ -An experimental module providing very low-level interface to +A module providing very low-level interface to C-level libraries, for use when implementing ctypes, not -intended for a direct use at all. \ No newline at end of file +intended for a direct use at all. diff --git a/pypy/doc/config/objspace.usemodules.cpyext.txt b/pypy/doc/config/objspace.usemodules.cpyext.txt --- a/pypy/doc/config/objspace.usemodules.cpyext.txt +++ b/pypy/doc/config/objspace.usemodules.cpyext.txt @@ -1,1 +1,1 @@ -Use (experimental) cpyext module, that tries to load and run CPython extension modules +Use cpyext module to load and run CPython extension modules diff --git a/pypy/doc/contributing.rst b/pypy/doc/contributing.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/contributing.rst @@ -0,0 +1,472 @@ +Contributing Guidelines +=========================== + +.. contents:: + +PyPy is a very large project that has a reputation of being hard to dive into. +Some of this fame is warranted, some of it is purely accidental. There are three +important lessons that everyone willing to contribute should learn: + +* PyPy has layers. There are many pieces of architecture that are very well + separated from each other. More about this below, but often the manifestation + of this is that things are at a different layer than you would expect them + to be. For example if you are looking for the JIT implementation, you will + not find it in the implementation of the Python programming language. + +* Because of the above, we are very serious about Test Driven Development. + It's not only what we believe in, but also that PyPy's architecture is + working very well with TDD in mind and not so well without it. Often + development means progressing in an unrelated corner, one unittest + at a time; and then flipping a giant switch, bringing it all together. + (It generally works out of the box. If it doesn't, then we didn't + write enough unit tests.) It's worth repeating - PyPy's + approach is great if you do TDD, and not so great otherwise. + +* PyPy uses an entirely different set of tools - most of them included + in the PyPy repository. There is no Makefile, nor autoconf. More below. + +The first thing to remember is that PyPy project is very different than most +projects out there. It's also different from a classic compiler project, +so academic courses about compilers often don't apply or lead in the wrong +direction. However, if you want to understand how designing & building a runtime +works in the real world then this is a great project! + +Getting involved +^^^^^^^^^^^^^^^^ + +PyPy employs a relatively standard open-source development process. You are +encouraged as a first step to join our `pypy-dev mailing list`_ and IRC channel, +details of which can be found in our :ref:`contact ` section. The folks +there are very friendly, and can point you in the right direction. + +We give out commit rights usually fairly liberally, so if you want to do something +with PyPy, you can become a committer. We also run frequent coding sprints which +are separately announced and often happen around Python conferences such as +EuroPython or PyCon. Upcoming events are usually announced on `the blog`_. + +Further Reading: :ref:`Contact ` + +.. _the blog: http://morepypy.blogspot.com +.. _pypy-dev mailing list: http://mail.python.org/mailman/listinfo/pypy-dev + + +Your first contribution +^^^^^^^^^^^^^^^^^^^^^^^ + +The first and most important rule how **not** to contribute to PyPy is +"just hacking a feature". This won't work, and you'll find your PR will typically +require a lot of re-work. There are a few reasons why not: + +* build times are large +* PyPy has very thick layer separation +* context of the cPython runtime is often required + +Instead, reach out on the dev mailing list or the IRC channel, and we're more +than happy to help! :) + +Some ideas for first contributions are: + +* Documentation - this will give you an understanding of the pypy architecture +* Test failures - find a failing test in the `nightly builds`_, and fix it +* Missing language features - these are listed in our `issue tracker`_ + +.. _nightly builds: http://buildbot.pypy.org/nightly/ +.. _issue tracker: https://bitbucket.org/pypy/pypy/issues + +Source Control +-------------- + +PyPy development is based a typical fork/pull request based workflow, centered +around Mercurial (hg), hosted on Bitbucket. If you have not used this workflow +before, a good introduction can be found here: + + https://www.atlassian.com/git/tutorials/comparing-workflows/forking-workflow + +The cycle for a new PyPy contributor goes typically like this: + +Fork & Clone +------------ + +* Make an account on bitbucket_. + +* Go to https://bitbucket.org/pypy/pypy/ and click "fork" (left + icons). You get a fork of the repository, e.g. in + `https://bitbucket.org/yourname/pypy/`. + +* Clone your new repo (i.e. the fork) to your local machine with the command + ``hg clone ssh://hg at bitbucket.org/yourname/pypy``. It is a very slow + operation but only ever needs to be done once. See also + http://pypy.org/download.html#building-from-source . + If you already cloned + ``https://bitbucket.org/pypy/pypy`` before, even if some time ago, + then you can reuse the same clone by editing the file ``.hg/hgrc`` in + your clone to contain the line ``default = + ssh://hg at bitbucket.org/yourname/pypy``, and then do ``hg pull && hg + up``. If you already have such a clone but don't want to change it, + you can clone that copy with ``hg clone /path/to/other/copy``, and + then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``. + +* Now you have a complete copy of the PyPy repo. Make a branch + with a command like ``hg branch name_of_your_branch``. + +Edit +---- + +* Edit things. Use ``hg diff`` to see what you changed. Use ``hg add`` + to make Mercurial aware of new files you added, e.g. new test files. + Use ``hg status`` to see if there are such files. Write and run tests! + (See the rest of this page.) + +* Commit regularly with ``hg commit``. A one-line commit message is + fine. We love to have tons of commits; make one as soon as you have + some progress, even if it is only some new test that doesn't pass yet, + or fixing things even if not all tests pass. Step by step, you are + building the history of your changes, which is the point of a version + control system. (There are commands like ``hg log`` and ``hg up`` + that you should read about later, to learn how to navigate this + history.) + +* The commits stay on your machine until you do ``hg push`` to "push" + them back to the repo named in the file ``.hg/hgrc``. Repos are + basically just collections of commits (a commit is also called a + changeset): there is one repo per url, plus one for each local copy on + each local machine. The commands ``hg push`` and ``hg pull`` copy + commits around, with the goal that all repos in question end up with + the exact same set of commits. By opposition, ``hg up`` only updates + the "working copy" by reading the local repository, i.e. it makes the + files that you see correspond to the latest (or any other) commit + locally present. + +* You should push often; there is no real reason not to. Remember that + even if they are pushed, with the setup above, the commits are (1) + only in ``bitbucket.org/yourname/pypy``, and (2) in the branch you + named. Yes, they are publicly visible, but don't worry about someone + walking around the thousands of repos on bitbucket saying "hah, look + at the bad coding style of that guy". Try to get into the mindset + that your work is not secret and it's fine that way. We might not + accept it as is for PyPy, asking you instead to improve some things, + but we are not going to judge you. + +Pull Request +------------ + +* The final step is to open a pull request, so that we know that you'd + like to merge that branch back to the original ``pypy/pypy`` repo. + This can also be done several times if you have interesting + intermediate states, but if you get there, then we're likely to + proceed to the next stage, which is... + +* Get a regular account for pushing directly to + ``bitbucket.org/pypy/pypy`` (just ask and you'll get it, basically). + Once you have it you can rewrite your file ``.hg/hgrc`` to contain + ``default = ssh://hg at bitbucket.org/pypy/pypy``. Your changes will + then be pushed directly to the official repo, but (if you follow these + rules) they are still on a branch, and we can still review the + branches you want to merge. + +* If you get closer to the regular day-to-day development, you'll notice + that we generally push small changes as one or a few commits directly + to the branch ``default``. Also, we often collaborate even if we are + on other branches, which do not really "belong" to anyone. At this + point you'll need ``hg merge`` and learn how to resolve conflicts that + sometimes occur when two people try to push different commits in + parallel on the same branch. But it is likely an issue for later ``:-)`` + +.. _bitbucket: https://bitbucket.org/ + + +Architecture +^^^^^^^^^^^^ + +PyPy has layers. Just like ogres or onions. Those layers help us keep the +respective parts separated enough to be worked on independently and make the +complexity manageable. This is, again, just a sanity requirement for such +a complex project. For example writing a new optimization for the JIT usually +does **not** involve touching a Python interpreter at all or the JIT assembler +backend or the garbage collector. Instead it requires writing small tests in +``rpython/jit/metainterp/optimizeopt/test/test_*`` and fixing files there. +After that, you can just compile PyPy and things should just work. + +Further Reading: :doc:`architecture ` + +Where to start? +--------------- + +PyPy is made from parts that are relatively independent of each other. +You should start looking at the part that attracts you most (all paths are +relative to the PyPy top level directory). You may look at our +:doc:`directory reference ` or start off at one of the following +points: + +* :source:`pypy/interpreter` contains the bytecode interpreter: bytecode dispatcher + in :source:`pypy/interpreter/pyopcode.py`, frame and code objects in + :source:`pypy/interpreter/eval.py` and :source:`pypy/interpreter/pyframe.py`, + function objects and argument passing in :source:`pypy/interpreter/function.py` + and :source:`pypy/interpreter/argument.py`, the object space interface + definition in :source:`pypy/interpreter/baseobjspace.py`, modules in + :source:`pypy/interpreter/module.py` and :source:`pypy/interpreter/mixedmodule.py`. + Core types supporting the bytecode interpreter are defined in + :source:`pypy/interpreter/typedef.py`. + +* :source:`pypy/interpreter/pyparser` contains a recursive descent parser, + and grammar files that allow it to parse the syntax of various Python + versions. Once the grammar has been processed, the parser can be + translated by the above machinery into efficient code. + +* :source:`pypy/interpreter/astcompiler` contains the compiler. This + contains a modified version of the compiler package from CPython + that fixes some bugs and is translatable. + +* :source:`pypy/objspace/std` contains the + :ref:`Standard object space `. The main file + is :source:`pypy/objspace/std/objspace.py`. For each type, the file + ``xxxobject.py`` contains the implementation for objects of type ``xxx``, + as a first approximation. (Some types have multiple implementations.) + +Building +^^^^^^^^ + +For building PyPy, we recommend installing a pre-built PyPy first (see +:doc:`install`). It is possible to build PyPy with CPython, but it will take a +lot longer to run -- depending on your architecture, between two and three +times as long. + +Further Reading: :doc:`Build ` + +Coding Guide +------------ + +As well as the usual pep8 and formatting standards, there are a number of +naming conventions and coding styles that are important to understand before +browsing the source. + +Further Reading: :doc:`Coding Guide ` + +Testing +^^^^^^^ + +Test driven development +----------------------- + +Instead, we practice a lot of test driven development. This is partly because +of very high quality requirements for compilers and partly because there is +simply no other way to get around such complex project, that will keep you sane. +There are probably people out there who are smart enough not to need it, we're +not one of those. You may consider familiarizing yourself with `pytest`_, +since this is a tool we use for tests. +This leads to the next issue: + +.. _pytest: http://pytest.org/ + +py.test and the py lib +---------------------- + +The `py.test testing tool`_ drives all our testing needs. + +We use the `py library`_ for filesystem path manipulations, terminal +writing, logging and some other support functionality. + +You don't necessarily need to install these two libraries because +we also ship them inlined in the PyPy source tree. + +.. _py library: http://pylib.readthedocs.org/ + +Running PyPy's unit tests +------------------------- + +PyPy development always was and is still thoroughly test-driven. +We use the flexible `py.test testing tool`_ which you can `install independently +`_ and use for other projects. + +The PyPy source tree comes with an inlined version of ``py.test`` +which you can invoke by typing:: + + python pytest.py -h + +This is usually equivalent to using an installed version:: + + py.test -h + +If you encounter problems with the installed version +make sure you have the correct version installed which +you can find out with the ``--version`` switch. + +You will need the `build requirements`_ to run tests successfully, since many of +them compile little pieces of PyPy and then run the tests inside that minimal +interpreter. The `cpyext` tests also require `pycparser`, and many tests build +cases with `hypothesis`. + +Now on to running some tests. PyPy has many different test directories +and you can use shell completion to point at directories or files:: + + py.test pypy/interpreter/test/test_pyframe.py + + # or for running tests of a whole subdirectory + py.test pypy/interpreter/ + +See `py.test usage and invocations`_ for some more generic info +on how you can run tests. + +Beware trying to run "all" pypy tests by pointing to the root +directory or even the top level subdirectory ``pypy``. It takes +hours and uses huge amounts of RAM and is not recommended. + +To run CPython regression tests you can point to the ``lib-python`` +directory:: + + py.test lib-python/2.7/test/test_datetime.py + +This will usually take a long time because this will run +the PyPy Python interpreter on top of CPython. On the plus +side, it's usually still faster than doing a full translation +and running the regression test with the translated PyPy Python +interpreter. + +.. _py.test testing tool: http://pytest.org +.. _py.test usage and invocations: http://pytest.org/latest/usage.html#usage +.. _`build requirements`: build.html#install-build-time-dependencies + +Testing After Translation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the usual invocation of `pytest` translates a piece of RPython code and +runs it, we have a test extension to run tests without translation, directly +on the host python. This is very convenient for modules such as `cpyext`, to +compare and contrast test results between CPython and PyPy. Untranslated tests +are invoked by using the `-A` or `--runappdirect` option to `pytest`:: + + python2 pytest.py -A pypy/module/cpyext/test + +where `python2` can be either `python2` or `pypy2`. On the `py3` branch, the +collection phase must be run with `python2` so untranslated tests are run +with:: + + cpython2 pytest.py -A pypy/module/cpyext/test --python=path/to/pypy3 + + +Tooling & Utilities +^^^^^^^^^^^^^^^^^^^ + +If you are interested in the inner workings of the PyPy Python interpreter, +there are some features of the untranslated Python interpreter that allow you +to introspect its internals. + + +Interpreter-level console +------------------------- + +To start interpreting Python with PyPy, install a C compiler that is +supported by distutils and use Python 2.7 or greater to run PyPy:: + + cd pypy + python bin/pyinteractive.py + +After a few seconds (remember: this is running on top of CPython), you should +be at the PyPy prompt, which is the same as the Python prompt, but with an +extra ">". + +If you press + on the console you enter the interpreter-level console, a +usual CPython console. You can then access internal objects of PyPy +(e.g. the :ref:`object space `) and any variables you have created on the PyPy +prompt with the prefix ``w_``:: + + >>>> a = 123 + >>>> + *** Entering interpreter-level console *** + >>> w_a + W_IntObject(123) + +The mechanism works in both directions. If you define a variable with the ``w_`` prefix on the interpreter-level, you will see it on the app-level:: + + >>> w_l = space.newlist([space.wrap(1), space.wrap("abc")]) + >>> + *** Leaving interpreter-level console *** + + KeyboardInterrupt + >>>> l + [1, 'abc'] + +Note that the prompt of the interpreter-level console is only '>>>' since +it runs on CPython level. If you want to return to PyPy, press (under +Linux) or , (under Windows). + +Also note that not all modules are available by default in this mode (for +example: ``_continuation`` needed by ``greenlet``) , you may need to use one of +``--withmod-...`` command line options. + +You may be interested in reading more about the distinction between +:ref:`interpreter-level and app-level `. + +pyinteractive.py options +------------------------ + +To list the PyPy interpreter command line options, type:: + + cd pypy + python bin/pyinteractive.py --help + +pyinteractive.py supports most of the options that CPython supports too (in addition to a +large amount of options that can be used to customize pyinteractive.py). +As an example of using PyPy from the command line, you could type:: + + python pyinteractive.py --withmod-time -c "from test import pystone; pystone.main(10)" + +Alternatively, as with regular Python, you can simply give a +script name on the command line:: + + python pyinteractive.py --withmod-time ../../lib-python/2.7/test/pystone.py 10 + +The ``--withmod-xxx`` option enables the built-in module ``xxx``. By +default almost none of them are, because initializing them takes time. +If you want anyway to enable all built-in modules, you can use +``--allworkingmodules``. + +See our :doc:`configuration sections ` for details about what all the commandline +options do. + + +.. _trace example: + +Tracing bytecode and operations on objects +------------------------------------------ + +You can use a simple tracing mode to monitor the interpretation of +bytecodes. To enable it, set ``__pytrace__ = 1`` on the interactive +PyPy console:: + + >>>> __pytrace__ = 1 + Tracing enabled + >>>> x = 5 + : LOAD_CONST 0 (5) + : STORE_NAME 0 (x) + : LOAD_CONST 1 (None) + : RETURN_VALUE 0 + >>>> x + : LOAD_NAME 0 (x) + : PRINT_EXPR 0 + 5 + : LOAD_CONST 0 (None) + : RETURN_VALUE 0 + >>>> + + +Demos +^^^^^ + +The `example-interpreter`_ repository contains an example interpreter +written using the RPython translation toolchain. + +.. _example-interpreter: https://bitbucket.org/pypy/example-interpreter + + +graphviz & pygame for flow graph viewing (highly recommended) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +graphviz and pygame are both necessary if you want to look at generated flow +graphs: + + graphviz: http://www.graphviz.org/Download.php + + pygame: http://www.pygame.org/download.shtml + diff --git a/pypy/doc/discussion/ctypes-implementation.rst b/pypy/doc/discussion/ctypes-implementation.rst --- a/pypy/doc/discussion/ctypes-implementation.rst +++ b/pypy/doc/discussion/ctypes-implementation.rst @@ -141,28 +141,3 @@ .. _pyglet: http://pyglet.org/ - -ctypes configure ------------------ - -We also released ``ctypes-configure``, which is an experimental package -trying to approach the portability issues of ctypes-based code. - -idea -~~~~ - -One of ctypes problems is that ctypes programs are usually not very -platform-independent. We created ctypes_configure, which invokes c -compiler (via distutils) for various platform-dependent details like -exact sizes of types (for example size_t), ``#defines``, exact outline of -structures etc. It replaces in this regard code generator (h2py). - -installation -~~~~~~~~~~~~ - -``easy_install ctypes_configure`` - -usage -~~~~~ - -:source:`ctypes_configure/doc/sample.py` explains in details how to use it. diff --git a/pypy/doc/embedding.rst b/pypy/doc/embedding.rst --- a/pypy/doc/embedding.rst +++ b/pypy/doc/embedding.rst @@ -1,5 +1,5 @@ -Embedding PyPy -============== +Embedding PyPy (DEPRECATED) +=========================== PyPy has a very minimal and a very strange embedding interface, based on the usage of `cffi`_ and the philosophy that Python is a better language than diff --git a/pypy/doc/eventhistory.rst b/pypy/doc/eventhistory.rst --- a/pypy/doc/eventhistory.rst +++ b/pypy/doc/eventhistory.rst @@ -40,11 +40,9 @@ Main focus of the sprint will be on the goals of the upcoming June 0.9 release. -Read more in `the sprint announcement`__, see who is planning to attend -on the `people page`_. +Read more about `the sprint`__ -__ https://bitbucket.org/pypy/extradoc/raw/tip/sprintinfo/ddorf2006/announce.html -.. _people page: https://bitbucket.org/pypy/extradoc/raw/tip/sprintinfo/ddorf2006/people.txt +__ https://bitbucket.org/pypy/extradoc/src/extradoc/sprintinfo/ddorf2006/ PyPy sprint at Akihabara (Tokyo, Japan) diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst --- a/pypy/doc/extradoc.rst +++ b/pypy/doc/extradoc.rst @@ -75,12 +75,12 @@ .. _A Way Forward in Parallelising Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2014/position-paper.pdf .. _Runtime Feedback in a Meta-Tracing JIT for Efficient Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/icooolps2011/jit-hints.pdf .. _Allocation Removal by Partial Evaluation in a Tracing JIT: https://bitbucket.org/pypy/extradoc/raw/extradoc/talk/pepm2011/bolz-allocation-removal.pdf -.. _Towards a Jitting VM for Prolog Execution: http://www.stups.uni-duesseldorf.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf +.. _Towards a Jitting VM for Prolog Execution: http://stups.hhu.de/mediawiki/images/a/a7/Pub-BoLeSch2010.pdf .. _High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages: http://buildbot.pypy.org/misc/antocuni-thesis.pdf .. _How to *not* write Virtual Machines for Dynamic Languages: https://bitbucket.org/pypy/extradoc/raw/tip/talk/dyla2007/dyla.pdf .. _`Tracing the Meta-Level: PyPy's Tracing JIT Compiler`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009/bolz-tracing-jit.pdf .. _`Faster than C#: Efficient Implementation of Dynamic Languages on .NET`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009-dotnet/cli-jit.pdf -.. _Automatic JIT Compiler Generation with Runtime Partial Evaluation: http://stups.hhu.de/mediawiki/images/b/b9/Master_bolz.pdf +.. _Automatic JIT Compiler Generation with Runtime Partial Evaluation: https://www.researchgate.net/profile/Davide_Ancona/publication/252023163_Automatic_generation_of_JIT_compilers_for_dynamic_languages_in_NET/links/53f2098e0cf2bc0c40e70023/Automatic-generation-of-JIT-compilers-for-dynamic-languages-in-NET.pdf .. _`RPython: A Step towards Reconciling Dynamically and Statically Typed OO Languages`: http://www.disi.unige.it/person/AnconaD/papers/DynamicLanguages_abstracts.html#AACM-DLS07 .. _EU Reports: index-report.html .. _Hardware Transactional Memory Support for Lightweight Dynamic Language Evolution: http://sabi.net/nriley/pubs/dls6-riley.pdf @@ -368,6 +368,6 @@ .. _LLVM: http://llvm.org/ .. _IronPython: http://ironpython.codeplex.com/ .. _Dynamic Native Optimization of Native Interpreters: http://people.csail.mit.edu/gregs/dynamorio.html -.. _JikesRVM: http://jikesrvm.org/ +.. _JikesRVM: http://www.jikesrvm.org/ .. _Tunes: http://tunes.org .. _old Tunes Wiki: http://buildbot.pypy.org/misc/cliki.tunes.org/ diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -67,7 +67,7 @@ you may need to run the command with `sudo` for a global installation. The other commands of ``setup.py`` are available too, like ``build``. -.. _PyPI: https://pypi.python.org/pypi +.. _PyPI: https://pypi.org .. _`use virtualenv (as documented here)`: install.html#installing-using-virtualenv @@ -360,7 +360,7 @@ (produced during a sprint). On the `PyPy bitbucket page`_ there is also a Scheme and an Io implementation; both of these are unfinished at the moment. -.. _Topaz: http://topazruby.com/ +.. _Topaz: http://docs.topazruby.com/en/latest/ .. _Hippy: http://morepypy.blogspot.ch/2012/07/hello-everyone.html .. _JavaScript interpreter: https://bitbucket.org/pypy/lang-js/ .. _Prolog interpreter: https://bitbucket.org/cfbolz/pyrolog/ diff --git a/pypy/doc/getting-started-dev.rst b/pypy/doc/getting-started-dev.rst deleted file mode 100644 --- a/pypy/doc/getting-started-dev.rst +++ /dev/null @@ -1,345 +0,0 @@ -Getting Started Developing With PyPy -==================================== - -.. contents:: - - -Using Mercurial ---------------- - -PyPy development is based on Mercurial (hg). If you are not used to -version control, the cycle for a new PyPy contributor goes typically -like this: - -* Make an account on bitbucket_. - -* Go to https://bitbucket.org/pypy/pypy/ and click "fork" (left - icons). You get a fork of the repository, e.g. in - https://bitbucket.org/yourname/pypy/. - -* Clone this new repo (i.e. the fork) to your local machine with the command - ``hg clone ssh://hg at bitbucket.org/yourname/pypy``. It is a very slow - operation but only ever needs to be done once. See also - http://pypy.org/download.html#building-from-source . - If you already cloned - ``https://bitbucket.org/pypy/pypy`` before, even if some time ago, - then you can reuse the same clone by editing the file ``.hg/hgrc`` in - your clone to contain the line ``default = - ssh://hg at bitbucket.org/yourname/pypy``, and then do ``hg pull && hg - up``. If you already have such a clone but don't want to change it, - you can clone that copy with ``hg clone /path/to/other/copy``, and - then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``. - -* Now you have a complete copy of the PyPy repo. Make a branch - with a command like ``hg branch name_of_your_branch``. - -* Edit things. Use ``hg diff`` to see what you changed. Use ``hg add`` - to make Mercurial aware of new files you added, e.g. new test files. - Use ``hg status`` to see if there are such files. Write and run tests! - (See the rest of this page.) - -* Commit regularly with ``hg commit``. A one-line commit message is - fine. We love to have tons of commits; make one as soon as you have - some progress, even if it is only some new test that doesn't pass yet, - or fixing things even if not all tests pass. Step by step, you are - building the history of your changes, which is the point of a version - control system. (There are commands like ``hg log`` and ``hg up`` - that you should read about later, to learn how to navigate this - history.) - -* The commits stay on your machine until you do ``hg push`` to "push" - them back to the repo named in the file ``.hg/hgrc``. Repos are - basically just collections of commits (a commit is also called a - changeset): there is one repo per url, plus one for each local copy on - each local machine. The commands ``hg push`` and ``hg pull`` copy - commits around, with the goal that all repos in question end up with - the exact same set of commits. By opposition, ``hg up`` only updates - the "working copy" by reading the local repository, i.e. it makes the - files that you see correspond to the latest (or any other) commit - locally present. - -* You should push often; there is no real reason not to. Remember that - even if they are pushed, with the setup above, the commits are (1) - only in ``bitbucket.org/yourname/pypy``, and (2) in the branch you - named. Yes, they are publicly visible, but don't worry about someone - walking around the thousands of repos on bitbucket saying "hah, look - at the bad coding style of that guy". Try to get into the mindset - that your work is not secret and it's fine that way. We might not - accept it as is for PyPy, asking you instead to improve some things, - but we are not going to judge you. - -* The final step is to open a pull request, so that we know that you'd - like to merge that branch back to the original ``pypy/pypy`` repo. - This can also be done several times if you have interesting - intermediate states, but if you get there, then we're likely to - proceed to the next stage, which is... - -* Get a regular account for pushing directly to - ``bitbucket.org/pypy/pypy`` (just ask and you'll get it, basically). - Once you have it you can rewrite your file ``.hg/hgrc`` to contain - ``default = ssh://hg at bitbucket.org/pypy/pypy``. Your changes will - then be pushed directly to the official repo, but (if you follow these - rules) they are still on a branch, and we can still review the - branches you want to merge. - -* If you get closer to the regular day-to-day development, you'll notice - that we generally push small changes as one or a few commits directly - to the branch ``default``. Also, we often collaborate even if we are - on other branches, which do not really "belong" to anyone. At this - point you'll need ``hg merge`` and learn how to resolve conflicts that - sometimes occur when two people try to push different commits in - parallel on the same branch. But it is likely an issue for later ``:-)`` - -.. _bitbucket: https://bitbucket.org/ - - -Running PyPy's unit tests -------------------------- - -PyPy development always was and is still thoroughly test-driven. -We use the flexible `py.test testing tool`_ which you can `install independently -`_ and use for other projects. - -The PyPy source tree comes with an inlined version of ``py.test`` -which you can invoke by typing:: - - python pytest.py -h - -This is usually equivalent to using an installed version:: - - py.test -h - -If you encounter problems with the installed version -make sure you have the correct version installed which -you can find out with the ``--version`` switch. - -You will need the `build requirements`_ to run tests successfully, since many of -them compile little pieces of PyPy and then run the tests inside that minimal -interpreter - -Now on to running some tests. PyPy has many different test directories -and you can use shell completion to point at directories or files:: - - py.test pypy/interpreter/test/test_pyframe.py - - # or for running tests of a whole subdirectory - py.test pypy/interpreter/ - -See `py.test usage and invocations`_ for some more generic info -on how you can run tests. - -Beware trying to run "all" pypy tests by pointing to the root -directory or even the top level subdirectory ``pypy``. It takes -hours and uses huge amounts of RAM and is not recommended. - -To run CPython regression tests you can point to the ``lib-python`` -directory:: - - py.test lib-python/2.7/test/test_datetime.py - -This will usually take a long time because this will run -the PyPy Python interpreter on top of CPython. On the plus -side, it's usually still faster than doing a full translation -and running the regression test with the translated PyPy Python -interpreter. - -.. _py.test testing tool: http://pytest.org -.. _py.test usage and invocations: http://pytest.org/latest/usage.html#usage -.. _`build requirements`: build.html#install-build-time-dependencies - -Special Introspection Features of the Untranslated Python Interpreter ---------------------------------------------------------------------- - -If you are interested in the inner workings of the PyPy Python interpreter, -there are some features of the untranslated Python interpreter that allow you -to introspect its internals. - - -Interpreter-level console -~~~~~~~~~~~~~~~~~~~~~~~~~ - -To start interpreting Python with PyPy, install a C compiler that is -supported by distutils and use Python 2.7 or greater to run PyPy:: - - cd pypy - python bin/pyinteractive.py - -After a few seconds (remember: this is running on top of CPython), you should -be at the PyPy prompt, which is the same as the Python prompt, but with an -extra ">". - -If you press - on the console you enter the interpreter-level console, a -usual CPython console. You can then access internal objects of PyPy -(e.g. the :ref:`object space `) and any variables you have created on the PyPy -prompt with the prefix ``w_``:: - - >>>> a = 123 - >>>> - *** Entering interpreter-level console *** - >>> w_a - W_IntObject(123) - -The mechanism works in both directions. If you define a variable with the ``w_`` prefix on the interpreter-level, you will see it on the app-level:: - - >>> w_l = space.newlist([space.wrap(1), space.wrap("abc")]) - >>> - *** Leaving interpreter-level console *** - - KeyboardInterrupt - >>>> l - [1, 'abc'] - -Note that the prompt of the interpreter-level console is only '>>>' since -it runs on CPython level. If you want to return to PyPy, press (under -Linux) or , (under Windows). - -Also note that not all modules are available by default in this mode (for -example: ``_continuation`` needed by ``greenlet``) , you may need to use one of -``--withmod-...`` command line options. - -You may be interested in reading more about the distinction between -:ref:`interpreter-level and app-level `. - -pyinteractive.py options -~~~~~~~~~~~~~~~~~~~~~~~~ - -To list the PyPy interpreter command line options, type:: - - cd pypy - python bin/pyinteractive.py --help - -pyinteractive.py supports most of the options that CPython supports too (in addition to a -large amount of options that can be used to customize pyinteractive.py). -As an example of using PyPy from the command line, you could type:: - - python pyinteractive.py --withmod-time -c "from test import pystone; pystone.main(10)" - -Alternatively, as with regular Python, you can simply give a -script name on the command line:: - - python pyinteractive.py --withmod-time ../../lib-python/2.7/test/pystone.py 10 - -The ``--withmod-xxx`` option enables the built-in module ``xxx``. By -default almost none of them are, because initializing them takes time. -If you want anyway to enable all built-in modules, you can use -``--allworkingmodules``. - -See our :doc:`configuration sections ` for details about what all the commandline -options do. - - -.. _trace example: - -Tracing bytecode and operations on objects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can use a simple tracing mode to monitor the interpretation of -bytecodes. To enable it, set ``__pytrace__ = 1`` on the interactive -PyPy console:: - - >>>> __pytrace__ = 1 - Tracing enabled - >>>> x = 5 - : LOAD_CONST 0 (5) - : STORE_NAME 0 (x) - : LOAD_CONST 1 (None) - : RETURN_VALUE 0 - >>>> x - : LOAD_NAME 0 (x) - : PRINT_EXPR 0 - 5 - : LOAD_CONST 0 (None) - : RETURN_VALUE 0 - >>>> - - -Demos ------ - -The `example-interpreter`_ repository contains an example interpreter -written using the RPython translation toolchain. - -.. _example-interpreter: https://bitbucket.org/pypy/example-interpreter - - -Additional Tools for running (and hacking) PyPy ------------------------------------------------ - -We use some optional tools for developing PyPy. They are not required to run -the basic tests or to get an interactive PyPy prompt but they help to -understand and debug PyPy especially for the translation process. - - -graphviz & pygame for flow graph viewing (highly recommended) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -graphviz and pygame are both necessary if you -want to look at generated flow graphs: - - graphviz: http://www.graphviz.org/Download.php - - pygame: http://www.pygame.org/download.shtml - - -py.test and the py lib -~~~~~~~~~~~~~~~~~~~~~~ - -The `py.test testing tool`_ drives all our testing needs. - -We use the `py library`_ for filesystem path manipulations, terminal -writing, logging and some other support functionality. - -You don't necessarily need to install these two libraries because -we also ship them inlined in the PyPy source tree. - -.. _py library: http://pylib.readthedocs.org/ - - -Getting involved ----------------- - -PyPy employs an open development process. You are invited to join our -`pypy-dev mailing list`_ or look at the other :ref:`contact -possibilities `. Usually we give out commit rights fairly liberally, so if you -want to do something with PyPy, you can become a committer. We also run frequent -coding sprints which are separately announced and often happen around Python -conferences such as EuroPython or PyCon. Upcoming events are usually announced -on `the blog`_. - -.. _the blog: http://morepypy.blogspot.com -.. _pypy-dev mailing list: http://mail.python.org/mailman/listinfo/pypy-dev - - -.. _start-reading-sources: - -Where to start reading the sources ----------------------------------- - -PyPy is made from parts that are relatively independent of each other. -You should start looking at the part that attracts you most (all paths are -relative to the PyPy top level directory). You may look at our :doc:`directory reference ` -or start off at one of the following points: - -* :source:`pypy/interpreter` contains the bytecode interpreter: bytecode dispatcher - in :source:`pypy/interpreter/pyopcode.py`, frame and code objects in - :source:`pypy/interpreter/eval.py` and :source:`pypy/interpreter/pyframe.py`, - function objects and argument passing in :source:`pypy/interpreter/function.py` - and :source:`pypy/interpreter/argument.py`, the object space interface - definition in :source:`pypy/interpreter/baseobjspace.py`, modules in - :source:`pypy/interpreter/module.py` and :source:`pypy/interpreter/mixedmodule.py`. - Core types supporting the bytecode interpreter are defined in :source:`pypy/interpreter/typedef.py`. - -* :source:`pypy/interpreter/pyparser` contains a recursive descent parser, - and grammar files that allow it to parse the syntax of various Python - versions. Once the grammar has been processed, the parser can be - translated by the above machinery into efficient code. - -* :source:`pypy/interpreter/astcompiler` contains the compiler. This - contains a modified version of the compiler package from CPython - that fixes some bugs and is translatable. - -* :source:`pypy/objspace/std` contains the :ref:`Standard object space `. The main file - is :source:`pypy/objspace/std/objspace.py`. For each type, the file - ``xxxobject.py`` contains the implementation for objects of type ``xxx``, - as a first approximation. (Some types have multiple implementations.) diff --git a/pypy/doc/how-to-contribute.rst b/pypy/doc/how-to-contribute.rst deleted file mode 100644 --- a/pypy/doc/how-to-contribute.rst +++ /dev/null @@ -1,93 +0,0 @@ -How to contribute to PyPy -========================= - -This page describes how to contribute to the PyPy project. The first thing -to remember is that PyPy project is very different than most projects out there. -It's also different from a classic compiler project, so academic courses -about compilers often don't apply or lead in the wrong direction. - - -Don't just hack ---------------- - -The first and most important rule how not to contribute to PyPy is -"just hacking". This won't work. There are two major reasons why not --- build times are large and PyPy has very thick layer separation which -make it harder to "just hack a feature". - - -Test driven development ------------------------ - -Instead, we practice a lot of test driven development. This is partly because -of very high quality requirements for compilers and partly because there is -simply no other way to get around such complex project, that will keep you sane. -There are probably people out there who are smart enough not to need it, we're -not one of those. You may consider familiarizing yourself with `pytest`_, -since this is a tool we use for tests. -This leads to the next issue: - -.. _pytest: http://pytest.org/ - - -Layers ------- - -PyPy has layers. Just like Ogres or onions. -Those layers help us keep the respective parts separated enough -to be worked on independently and make the complexity manageable. This is, -again, just a sanity requirement for such a complex project. For example writing -a new optimization for the JIT usually does **not** involve touching a Python -interpreter at all or the JIT assembler backend or the garbage collector. -Instead it requires writing small tests in -``rpython/jit/metainterp/optimizeopt/test/test_*`` and fixing files there. -After that, you can just compile PyPy and things should just work. - -The short list of layers for further reading. For each of those layers, a good -entry point is a test subdirectory in respective directories. It usually -describes (better or worse) the interfaces between the submodules. For the -``pypy`` subdirectory, most tests are small snippets of python programs that -check for correctness (calls ``AppTestXxx``) that will call the appropriate -part of the interpreter. For the ``rpython`` directory, most tests are small -RPython interpreters that perform certain tasks. To see how they translate -to low-level graphs, run them with ``--view``. To see small interpreters -with a JIT compiler, use ``--viewloops`` option. - -* **python interpreter** - it's the part implemented in the ``pypy/`` directory. - It's implemented in RPython, which is a high level static language with - classes, garbage collection, just-in-time compiler generation and the ability - to call C. A cool part about it is that it can be run untranslated, so all - the tests are runnable without translating PyPy. - - **interpreter** contains the interpreter core - - **objspace** contains implementations of various objects exported to - the Python layer - - **module** directory contains extension modules written in RPython - -* **rpython compiler** that resides in ``rpython/annotator`` and - ``rpython/rtyper`` directories. Consult `Getting Started with RPython`_ - for further reading - -* **JIT generator** lives in ``rpython/jit`` directory. optimizations live - in ``rpython/jit/metainterp/optimizeopt``, the main JIT in - ``rpython/jit/metainterp`` (runtime part) and - ``rpython/jit/codewriter`` (translation-time part). Backends live in - ``rpython/jit/backend``. - -* **garbage collection** lives in ``rpython/memory`` - -The rest of directories serve specific niche goal and are unlikely a good -entry point. - - -More documentation ------------------- - -* `Getting Started Developing With PyPy`_ - -* `Getting Started with RPython`_ - -.. _`Getting Started Developing With PyPy`: getting-started-dev.html -.. _`Getting started with RPython`: http://rpython.readthedocs.org/en/latest/getting-started.html diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst --- a/pypy/doc/index-of-whatsnew.rst +++ b/pypy/doc/index-of-whatsnew.rst @@ -34,6 +34,7 @@ whatsnew-2.0.0-beta1.rst whatsnew-1.9.rst + CPython 3.5 compatible versions ------------------------------- diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -9,7 +9,7 @@ * If you're interested in trying PyPy out, check out the :doc:`installation instructions `. -* If you want to help develop PyPy, please have a look at :doc:`how to contribute ` +* If you want to help develop PyPy, please have a look at :doc:`contributing ` and get in touch (:ref:`contact`)! All of the documentation and source code is available under the MIT license, @@ -31,6 +31,7 @@ introduction install build + windows faq @@ -40,43 +41,30 @@ ---------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 cpython_differences extending - embedding gc_info jit-hooks stackless __pypy__-module - objspace-proxies sandbox stm - windows - -.. _developing-pypy: - -Development documentation -------------------------- +Development +----------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 - getting-started-dev - how-to-contribute - you-want-to-help + contributing architecture configuration project-ideas project-documentation how-to-release -.. TODO: audit ^^ - - -.. TODO: Fill this in - Further resources ----------------- @@ -84,13 +72,10 @@ .. toctree:: :maxdepth: 1 - extradoc - eventhistory - discussions index-of-release-notes index-of-whatsnew contributor - + glossary .. _contact: @@ -118,7 +103,7 @@ the `development mailing list`_. .. _#pypy on irc.freenode.net: irc://irc.freenode.net/pypy -.. _here: http://www.tismer.com/pypy/irc-logs/pypy/ +.. _here: https://botbot.me/freenode/pypy/ .. _Development mailing list: http://mail.python.org/mailman/listinfo/pypy-dev .. _Commit mailing list: http://mail.python.org/mailman/listinfo/pypy-commit .. _Development bug/feature tracker: https://bitbucket.org/pypy/pypy/issues diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not diff --git a/pypy/doc/interpreter.rst b/pypy/doc/interpreter.rst --- a/pypy/doc/interpreter.rst +++ b/pypy/doc/interpreter.rst @@ -102,7 +102,7 @@ program flows with homogeneous name-value assignments on function invocations. -.. _how-to guide for descriptors: http://users.rcn.com/python/download/Descriptor.htm +.. _how-to guide for descriptors: https://docs.python.org/3/howto/descriptor.html Bytecode Interpreter Implementation Classes diff --git a/pypy/doc/man/pypy3.1.rst b/pypy/doc/man/pypy3.1.rst new file mode 100644 --- /dev/null +++ b/pypy/doc/man/pypy3.1.rst @@ -0,0 +1,135 @@ +======= + pypy3 +======= + +.. note: this is turned into a regular man page "pypy3.1" by + doing "make man" in pypy/doc/ + +SYNOPSIS +======== + +``pypy3`` [*options*] +[``-c`` *cmd*\ \|\ ``-m`` *mod*\ \|\ *file.py*\ \|\ ``-``\ ] +[*arg*\ ...] + +OPTIONS +======= + +-i + Inspect interactively after running script. + +-O + Skip assert statements. + +-OO + Remove docstrings when importing modules in addition to ``-O``. + +-c CMD + Program passed in as ``CMD`` (terminates option list). + +-S + Do not ``import site`` on initialization. + +-s + Don't add the user site directory to `sys.path`. + +-u + Unbuffered binary ``stdout`` and ``stderr``. + +-h, --help + Show a help message and exit. + +-m MOD + Library module to be run as a script (terminates option list). + +-W ARG + Warning control (*arg* is *action*:*message*:*category*:*module*:*lineno*). + +-E + Ignore environment variables (such as ``PYTHONPATH``). + +-B + Disable writing bytecode (``.pyc``) files. + +-X track-resources + Produce a ``ResourceWarning`` whenever a file or socket is closed by the + garbage collector. + +--version + Print the PyPy version. + +--info + Print translation information about this PyPy executable. + +--jit ARG + Low level JIT parameters. Mostly internal. Run ``--jit help`` + for more information. + +ENVIRONMENT +=========== + +``PYTHONPATH`` + Add directories to pypy3's module search path. + The format is the same as shell's ``PATH``. + +``PYTHONSTARTUP`` + A script referenced by this variable will be executed before the + first prompt is displayed, in interactive mode. + +``PYTHONDONTWRITEBYTECODE`` + If set to a non-empty value, equivalent to the ``-B`` option. + Disable writing ``.pyc`` files. + +``PYTHONINSPECT`` + If set to a non-empty value, equivalent to the ``-i`` option. + Inspect interactively after running the specified script. + +``PYTHONIOENCODING`` + If this is set, it overrides the encoding used for + *stdin*/*stdout*/*stderr*. + The syntax is *encodingname*:*errorhandler* + The *errorhandler* part is optional and has the same meaning as in + `str.encode`. + +``PYTHONNOUSERSITE`` + If set to a non-empty value, equivalent to the ``-s`` option. + Don't add the user site directory to `sys.path`. + +``PYTHONWARNINGS`` + If set, equivalent to the ``-W`` option (warning control). + The value should be a comma-separated list of ``-W`` parameters. + +``PYPYLOG`` + If set to a non-empty value, enable logging, the format is: + + *fname* or *+fname* + logging for profiling: includes all + ``debug_start``/``debug_stop`` but not any nested + ``debug_print``. + *fname* can be ``-`` to log to *stderr*. + The *+fname* form can be used if there is a *:* in fname + + ``:``\ *fname* + Full logging, including ``debug_print``. + + *prefix*\ ``:``\ *fname* + Conditional logging. + Multiple prefixes can be specified, comma-separated. + Only sections whose name match the prefix will be logged. + + ``PYPYLOG=jit-log-opt,jit-backend:logfile`` will + generate a log suitable for *jitviewer*, a tool for debugging + performance issues under PyPy. + +``PYPY_IRC_TOPIC`` + If set to a non-empty value, print a random #pypy IRC + topic at startup of interactive mode. + + +.. include:: ../gc_info.rst + :start-line: 7 + +SEE ALSO +======== + +**python3**\ (1) diff --git a/pypy/doc/objspace-proxies.rst b/pypy/doc/objspace-proxies.rst --- a/pypy/doc/objspace-proxies.rst +++ b/pypy/doc/objspace-proxies.rst @@ -1,28 +1,7 @@ -What PyPy can do for your objects -================================= - -.. contents:: - - -Thanks to the :doc:`Object Space ` architecture, any feature that is -based on proxying, extending, changing or otherwise controlling the -behavior of objects in a running program is easy to implement on top of PyPy. - -Here is what we have implemented so far, in historical order: - -* *Dump Object Space*: dumps all operations performed on all the objects - into a large log file. For debugging your applications. - -* *Transparent Proxies Extension*: adds new proxy objects to - the Standard Object Space that enable applications to - control operations on application and builtin objects, - e.g lists, dictionaries, tracebacks. - - .. _tproxy: -Transparent Proxies -------------------- +Transparent Proxies (DEPRECATED) +-------------------------------- .. warning:: @@ -194,7 +173,7 @@ application-level code. Transparent proxies are implemented on top of the :ref:`standard object -space `, in :source:`pypy/objspace/std/proxy_helpers.py`, +space `, in :source:`pypy/objspace/std/proxyobject.py`, :source:`pypy/objspace/std/proxyobject.py` and :source:`pypy/objspace/std/transparent.py`. To use them you will need to pass a `--objspace-std-withtproxy`_ option to ``pypy`` or ``translate.py``. This registers implementations named :py:class:`W_TransparentXxx` diff --git a/pypy/doc/objspace.rst b/pypy/doc/objspace.rst --- a/pypy/doc/objspace.rst +++ b/pypy/doc/objspace.rst @@ -474,8 +474,8 @@ :source:`pypy/objspace/std/bytesobject.py` defines ``W_AbstractBytesObject``, which contains everything needed to build the ``str`` app-level type; and there are subclasses ``W_BytesObject`` (the usual string) and -``W_StringBufferObject`` (a special implementation tweaked for repeated -additions, in :source:`pypy/objspace/std/strbufobject.py`). For mutable data +``W_Buffer`` (a special implementation tweaked for repeated +additions, in :source:`pypy/objspace/std/bufferobject.py`). For mutable data types like lists and dictionaries, we have a single class ``W_ListObject`` or ``W_DictMultiObject`` which has an indirection to the real data and a strategy; the strategy can change as the content of diff --git a/pypy/doc/project-documentation.rst b/pypy/doc/project-documentation.rst --- a/pypy/doc/project-documentation.rst +++ b/pypy/doc/project-documentation.rst @@ -32,10 +32,13 @@ coding-guide sprint-reports extradoc + eventhistory video-index index-report + discussions dev_method - glossary + embedding + objspace-proxies Source Code Documentation diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -18,3 +18,17 @@ .. branch: crypt_h Include crypt.h for crypt() on Linux + +.. branch: gc-more-logging + +Log additional gc-minor and gc-collect-step info in the PYPYLOG + +.. branch: reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. diff --git a/pypy/doc/whatsnew-pypy2-5.10.0.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst --- a/pypy/doc/whatsnew-pypy2-5.10.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst @@ -32,6 +32,7 @@ .. branch: fix-vmprof-stacklet-switch .. branch: fix-vmprof-stacklet-switch-2 + Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) .. branch: win32-vcvars @@ -39,4 +40,3 @@ .. branch: rdict-fast-hash Make it possible to declare that the hash function of an r_dict is fast in RPython. - diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,33 +25,32 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite may be installed in +was checked in May 2018). Note that the compiler suite may be installed in ``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` or in ``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. -A current version of ``setuptools`` will be able to find it there. For -Windows 10, you must right-click the download, and under ``Properties`` -> -``Compatibility`` mark it as ``Run run this program in comatibility mode for`` -``Previous version...``. Also, you must download and install the ``.Net Framework 3.5``, +A current version of ``setuptools`` will be able to find it there. +Also, you must download and install the ``.Net Framework 3.5``, otherwise ``mt.exe`` will silently fail. Installation will begin automatically by running the mt.exe command by hand from a DOS window (that is how the author discovered the problem). -.. _Microsoft Visual C++ Compiler for Python 2.7: https://www.microsoft.com/en-us/download/details.aspx?id=44266 +.. _Microsoft Visual C++ Compiler for Python 2.7: https://www.microsoft.com/EN-US/DOWNLOAD/DETAILS.ASPX?ID=44266 -Installing "Build Tools for Visual Studio 2017" (for Python 3) +Installing "Build Tools for Visual Studio 2015" (for Python 3) -------------------------------------------------------------- -As documented in the CPython Wiki_, CPython now recommends Visual C++ version -14.0. A compact version of the compiler suite can be obtained from Microsoft_ -downloads, search the page for "Build Tools for Visual Studio 2017". +As documented in the CPython Wiki_, CPython recommends Visual C++ version +14.0 for python version 3.5. A compact version of the compiler suite can be +obtained from Microsoft_ downloads, search the page for "Microsoft Build Tools 2015". -You will also need to install the the `Windows SDK`_ in order to use the -`mt.exe` mainfest compiler. +You will need to reboot the computer for the installation to successfully install and +run the `mt.exe` mainfest compiler. The installation will set the +`VS140COMNTOOLS` environment variable, this is key to distutils/setuptools +finding the compiler .. _Wiki: https://wiki.python.org/moin/WindowsCompilers -.. _Microsoft: https://www.visualstudio.com/downloads -.. _`Windows SDK`: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk +.. _Microsoft: https://www.visualstudio.com/vs/older-downloads/ Translating PyPy with Visual Studio ----------------------------------- @@ -99,6 +98,9 @@ Setting Up Visual Studio 9.0 for building SSL in Python3 -------------------------------------------------------- +**Note: this is old information, left for historical reference. We recommend +using Visual Studio 2015, which now seems to properly set this all up.** + On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after translation. However ``distutils`` does not support the Micorosft-provided Visual C compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The @@ -146,14 +148,14 @@ Installing external packages ---------------------------- -We uses a `repository` parallel to pypy to hold binary compiled versions of the +We uses a subrepository_ inside pypy to hold binary compiled versions of the build dependencies for windows. As part of the `rpython` setup stage, environment variables will be set to use these dependencies. The repository has a README file on how to replicate, and a branch for each supported platform. You may run - the `get_externals.py` utility to checkout the proper branch for your platform +the `get_externals.py` utility to checkout the proper branch for your platform and PyPy version. -.. _repository: https://bitbucket.org/pypy/external +.. _subrepository: https://bitbucket.org/pypy/external Using the mingw compiler ------------------------ diff --git a/pypy/doc/you-want-to-help.rst b/pypy/doc/you-want-to-help.rst deleted file mode 100644 --- a/pypy/doc/you-want-to-help.rst +++ /dev/null @@ -1,81 +0,0 @@ -You want to help with PyPy, now what? -===================================== - -PyPy is a very large project that has a reputation of being hard to dive into. -Some of this fame is warranted, some of it is purely accidental. There are three -important lessons that everyone willing to contribute should learn: - -* PyPy has layers. There are many pieces of architecture that are very well - separated from each other. More about this below, but often the manifestation - of this is that things are at a different layer than you would expect them - to be. For example if you are looking for the JIT implementation, you will - not find it in the implementation of the Python programming language. - -* Because of the above, we are very serious about Test Driven Development. - It's not only what we believe in, but also that PyPy's architecture is - working very well with TDD in mind and not so well without it. Often - development means progressing in an unrelated corner, one unittest - at a time; and then flipping a giant switch, bringing it all together. - (It generally works out of the box. If it doesn't, then we didn't - write enough unit tests.) It's worth repeating - PyPy's - approach is great if you do TDD, and not so great otherwise. - -* PyPy uses an entirely different set of tools - most of them included - in the PyPy repository. There is no Makefile, nor autoconf. More below. - - -Architecture ------------- - -PyPy has layers. The 100 miles view: - -* :ref:`RPython ` is the language in which we write interpreters. Not the entire - PyPy project is written in RPython, only the parts that are compiled in - the translation process. The interesting point is that RPython has no parser, - it's compiled from the live python objects, which makes it possible to do - all kinds of metaprogramming during import time. In short, Python is a meta - programming language for RPython. - - The RPython standard library is to be found in the ``rlib`` subdirectory. - -* The translation toolchain - this is the part that takes care of translating - RPython to flow graphs and then to C. There is more in the :doc:`architecture ` - document written about it. - - It lives in the ``rpython`` directory: ``flowspace``, ``annotator`` - and ``rtyper``. - -* Python Interpreter and modules - - This is in the ``pypy`` directory. ``pypy/interpreter`` is a standard - interpreter for Python written in RPython. The fact that it is - RPython is not apparent at first. Built-in modules are written in - ``pypy/module/*``. Some modules that CPython implements in C are - simply written in pure Python; they are in the top-level ``lib_pypy`` - directory. The standard library of Python (with a few changes to - accomodate PyPy) is in ``lib-python``. - -* :ref:`Just-in-Time Compiler (JIT) `: we have a tracing JIT that traces the - interpreter written in RPython, rather than the user program that it - interprets. As a result it applies to any interpreter, i.e. any - language. But getting it to work correctly is not trivial: it - requires a small number of precise "hints" and possibly some small - refactorings of the interpreter. The JIT itself also has several - almost-independent parts: the tracer itself in ``rpython/jit/metainterp``, the - optimizer in ``rpython/jit/metainterp/optimizer`` that optimizes a list of - residual operations, and the backend in ``rpython/jit/backend/`` - that turns it into machine code. Writing a new backend is a - traditional way to get into the project. - -* Garbage Collectors (GC): as you may notice if you are used to CPython's - C code, there are no ``Py_INCREF/Py_DECREF`` equivalents in RPython code. - :ref:`rpython:garbage-collection` is inserted - during translation. Moreover, this is not reference counting; it is a real - GC written as more RPython code. The best one we have so far is in - ``rpython/memory/gc/incminimark.py``. - - -Toolset -------- - -xxx diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -83,11 +83,24 @@ sys.excepthook(), catching SystemExit, printing a newline after sys.stdout if needed, etc. """ + # don't use try:except: here, otherwise the exception remains + # visible in user code. Make sure revdb_stop is a callable, so + # that we can call it immediately after finally: below. Doing + # so minimizes the number of "blind" lines that we need to go + # back from, with "bstep", after we do "continue" in revdb. + if '__pypy__' in sys.builtin_module_names: + from __pypy__ import revdb_stop + else: + revdb_stop = None + if revdb_stop is None: + revdb_stop = lambda: None + try: # run it try: f(*fargs, **fkwds) finally: + revdb_stop() sys.settrace(None) sys.setprofile(None) diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py --- a/pypy/interpreter/astcompiler/assemble.py +++ b/pypy/interpreter/astcompiler/assemble.py @@ -673,6 +673,7 @@ ops.JUMP_IF_NOT_DEBUG: 0, ops.BUILD_LIST_FROM_ARG: 1, + ops.LOAD_REVDB_VAR: 1, } diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -1593,6 +1593,8 @@ return Num.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Str): return Str.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_RevDBMetaVar): + return RevDBMetaVar.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Attribute): return Attribute.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Subscript): @@ -2456,6 +2458,41 @@ State.ast_type('Str', 'expr', ['s']) +class RevDBMetaVar(expr): + + def __init__(self, metavar, lineno, col_offset): + self.metavar = metavar + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_RevDBMetaVar(self) + + def mutate_over(self, visitor): + return visitor.visit_RevDBMetaVar(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_RevDBMetaVar) + w_metavar = space.newint(self.metavar) # int + space.setattr(w_node, space.newtext('metavar'), w_metavar) + w_lineno = space.newint(self.lineno) # int + space.setattr(w_node, space.newtext('lineno'), w_lineno) + w_col_offset = space.newint(self.col_offset) # int + space.setattr(w_node, space.newtext('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_metavar = get_field(space, w_node, 'metavar', False) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) From pypy.commits at gmail.com Sun Jun 10 19:29:04 2018 From: pypy.commits at gmail.com (wlav) Date: Sun, 10 Jun 2018 16:29:04 -0700 (PDT) Subject: [pypy-commit] pypy cppyy-packaging: py3 fixes Message-ID: <5b1db440.1c69fb81.b025c.16a9@mx.google.com> Author: Wim Lavrijsen Branch: cppyy-packaging Changeset: r94750:749cd13269b0 Date: 2018-06-10 16:10 -0700 http://bitbucket.org/pypy/pypy/changeset/749cd13269b0/ Log: py3 fixes diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -14,7 +14,7 @@ '_set_function_generator': 'interp_cppyy.set_function_generator', '_register_class' : 'interp_cppyy.register_class', '_get_nullptr' : 'interp_cppyy.get_nullptr', - 'CPPInstanceBase' : 'interp_cppyy.W_CPPInstance', + 'CPPInstance' : 'interp_cppyy.W_CPPInstance', 'addressof' : 'interp_cppyy.addressof', '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -391,7 +391,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = self._get_raw_address(space, w_obj, offset) charpptr = rffi.cast(rffi.CCHARPP, address) - return space.newbytes(rffi.charp2str(charpptr[0])) + return space.newtext(rffi.charp2str(charpptr[0])) def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') @@ -408,7 +408,7 @@ strsize = self.size if charpptr[self.size-1] == '\0': strsize = self.size-1 # rffi will add \0 back - return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + return space.newtext(rffi.charpsize2str(charpptr, strsize)) class VoidPtrConverter(TypeConverter): diff --git a/pypy/module/_cppyy/ffitypes.py b/pypy/module/_cppyy/ffitypes.py --- a/pypy/module/_cppyy/ffitypes.py +++ b/pypy/module/_cppyy/ffitypes.py @@ -79,10 +79,13 @@ value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) else: - value = space.text_w(w_value) + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) if len(value) != 1: raise oefmt(space.w_ValueError, - "char expected, got string of size %d", len(value)) + "char expected, got string of size %d", len(value)) value = rffi.cast(rffi.CHAR, value[0]) return value # turn it into a "char" to the annotator @@ -110,10 +113,13 @@ value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) else: - value = space.text_w(w_value) + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) if len(value) != 1: raise oefmt(space.w_ValueError, - "char expected, got string of size %d", len(value)) + "usigned char expected, got string of size %d", len(value)) value = rffi.cast(rffi.CHAR, value[0]) return value # turn it into a "char" to the annotator diff --git a/pypy/module/_cppyy/helper.py b/pypy/module/_cppyy/helper.py --- a/pypy/module/_cppyy/helper.py +++ b/pypy/module/_cppyy/helper.py @@ -1,3 +1,4 @@ +import sys from rpython.rlib import rstring @@ -116,6 +117,17 @@ # TODO: perhaps absorb or "pythonify" these operators? return cppname +if sys.hexversion < 0x3000000: + CPPYY__div__ = "__div__" + CPPYY__idiv__ = "__idiv__" + CPPYY__long__ = "__long__" + CPPYY__bool__ = "__nonzero__" +else: + CPPYY__div__ = "__truediv__" + CPPYY__idiv__ = "__itruediv__" + CPPYY__long__ = "__int__" + CPPYY__bool__ = "__bool__" + # _operator_mappings["[]"] = "__setitem__" # depends on return type # _operator_mappings["+"] = "__add__" # depends on # of args (see __pos__) # _operator_mappings["-"] = "__sub__" # id. (eq. __neg__) @@ -123,7 +135,7 @@ # _operator_mappings["[]"] = "__getitem__" # depends on return type _operator_mappings["()"] = "__call__" -_operator_mappings["/"] = "__div__" # __truediv__ in p3 +_operator_mappings["/"] = CPPYY__div__ _operator_mappings["%"] = "__mod__" _operator_mappings["**"] = "__pow__" # not C++ _operator_mappings["<<"] = "__lshift__" @@ -136,7 +148,7 @@ _operator_mappings["+="] = "__iadd__" _operator_mappings["-="] = "__isub__" _operator_mappings["*="] = "__imul__" -_operator_mappings["/="] = "__idiv__" # __itruediv__ in p3 +_operator_mappings["/="] = CPPYY__idiv__ _operator_mappings["%="] = "__imod__" _operator_mappings["**="] = "__ipow__" _operator_mappings["<<="] = "__ilshift__" @@ -154,7 +166,7 @@ # the following type mappings are "exact" _operator_mappings["const char*"] = "__str__" _operator_mappings["int"] = "__int__" -_operator_mappings["long"] = "__long__" # __int__ in p3 +_operator_mappings["long"] = CPPYY__long__ _operator_mappings["double"] = "__float__" # the following type mappings are "okay"; the assumption is that they @@ -163,13 +175,13 @@ _operator_mappings["char*"] = "__str__" _operator_mappings["short"] = "__int__" _operator_mappings["unsigned short"] = "__int__" -_operator_mappings["unsigned int"] = "__long__" # __int__ in p3 -_operator_mappings["unsigned long"] = "__long__" # id. -_operator_mappings["long long"] = "__long__" # id. -_operator_mappings["unsigned long long"] = "__long__" # id. +_operator_mappings["unsigned int"] = CPPYY__long__ +_operator_mappings["unsigned long"] = CPPYY__long__ +_operator_mappings["long long"] = CPPYY__long__ +_operator_mappings["unsigned long long"] = CPPYY__long__ _operator_mappings["float"] = "__float__" -_operator_mappings["bool"] = "__nonzero__" # __bool__ in p3 +_operator_mappings["bool"] = CPPYY__bool__ # the following are not python, but useful to expose _operator_mappings["->"] = "__follow__" diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -1470,7 +1470,9 @@ __init__ = interp2app(W_CPPInstance.instance__init__), __eq__ = interp2app(W_CPPInstance.instance__eq__), __ne__ = interp2app(W_CPPInstance.instance__ne__), + # should be based on python version, but syntax is simpler this way __nonzero__ = interp2app(W_CPPInstance.instance__nonzero__), + __bool__ = interp2app(W_CPPInstance.instance__nonzero__), __len__ = interp2app(W_CPPInstance.instance__len__), __cmp__ = interp2app(W_CPPInstance.instance__cmp__), __repr__ = interp2app(W_CPPInstance.instance__repr__), diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -4,10 +4,22 @@ import sys -# Metaclasses are needed to store C++ static data members as properties. Since -# the interp-level does not support metaclasses, they are created at app-level. -# These are the metaclass base classes: -class CPPScope(type): +# Metaclasses are needed to store C++ static data members as properties and to +# provide Python language features such as a customized __dir__ for namespaces +# and __getattr__ for both. These features are used for lazy lookup/creation. +# Since the interp-level does not support metaclasses, this is all done at the +# app-level. +# +# C++ namespaces: are represented as Python classes, with CPPNamespace as the +# base class, which is at the moment just a label, and CPPNamespaceMeta as +# the base class of their invididualized meta class. +# +# C++ classes: are represented as Python classes, with CPPClass as the base +# class, which is a subclass of the interp-level CPPInstance. The former +# sets up the Python-class behavior for bound classes, the latter adds the +# bound object behavior that lives at the class level. + +class CPPScopeMeta(type): def __getattr__(self, name): try: return get_scoped_pycppitem(self, name) # will cache on self @@ -15,18 +27,57 @@ raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) -class CPPMetaNamespace(CPPScope): +class CPPNamespaceMeta(CPPScopeMeta): def __dir__(self): return self.__cppdecl__.__dir__() -class CPPClass(CPPScope): +class CPPClassMeta(CPPScopeMeta): pass -# namespace base class (class base class defined in _post_import_startup() -class CPPNamespace(object): - __metatype__ = CPPMetaNamespace +# from six.py --- +# Copyright (c) 2010-2017 Benjamin Peterson +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(type): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, 'temporary_class', (), {}) +# --- end from six.py + +# C++ namespace base class (the C++ class base class defined in _post_import_startup) +class CPPNamespace(with_metaclass(CPPNamespaceMeta, object)): + pass + + +# TODO: this can be moved to the interp level (and share template argument +# construction with function templates there) class CPPTemplate(object): def __init__(self, name, scope=None): self._name = name @@ -123,7 +174,7 @@ # create a metaclass to allow properties (for static data write access) import _cppyy - ns_meta = type(name+'_meta', (CPPMetaNamespace,), {}) + ns_meta = type(CPPNamespace)(name+'_meta', (CPPNamespaceMeta,), {}) # create the python-side C++ namespace representation, cache in scope if given d = {"__cppdecl__" : decl, @@ -164,7 +215,7 @@ # get a list of base classes for class creation bases = [get_pycppclass(base) for base in decl.get_base_names()] if not bases: - bases = [CPPInstance,] + bases = [CPPClass,] else: # it's possible that the required class now has been built if one of # the base classes uses it in e.g. a function interface @@ -202,10 +253,10 @@ # create a metaclass to allow properties (for static data write access) metabases = [type(base) for base in bases] - metacpp = type(CPPScope)(cl_name+'_meta', _drop_cycles(metabases), d_meta) + cl_meta = type(CPPClassMeta)(cl_name+'_meta', _drop_cycles(metabases), d_meta) # create the python-side C++ class - pycls = metacpp(cl_name, _drop_cycles(bases), d_class) + pycls = cl_meta(cl_name, _drop_cycles(bases), d_class) # store the class on its outer scope setattr(scope, cl_name, pycls) @@ -284,7 +335,7 @@ def extract_namespace(name): # find the namespace the named class lives in, take care of templates tpl_open = 0 - for pos in xrange(len(name)-1, 1, -1): + for pos in range(len(name)-1, 1, -1): c = name[pos] # count '<' and '>' to be able to skip template contents @@ -425,11 +476,11 @@ # at pypy-c startup, rather than on the "import _cppyy" statement import _cppyy - # root of all proxy classes: CPPInstance in pythonify exists to combine - # the CPPScope metaclass with the interp-level CPPInstanceBase - global CPPInstance - class CPPInstance(_cppyy.CPPInstanceBase): - __metaclass__ = CPPScope + # root of all proxy classes: CPPClass in pythonify exists to combine the + # CPPClassMeta metaclass (for Python-side class behavior) with the + # interp-level CPPInstance (for bound object behavior) + global CPPClass + class CPPClass(with_metaclass(CPPClassMeta, _cppyy.CPPInstance)): pass # class generator callback diff --git a/pypy/module/_cppyy/test/datatypes.h b/pypy/module/_cppyy/test/datatypes.h --- a/pypy/module/_cppyy/test/datatypes.h +++ b/pypy/module/_cppyy/test/datatypes.h @@ -44,16 +44,16 @@ m_cc_called(true), m_x(s.m_x), m_y(s.m_y), m_z(s.m_z), m_t(s.m_t) {} double operator[](int i) { - if (i == 0) return m_x; - if (i == 1) return m_y; - if (i == 2) return m_z; - if (i == 3) return m_t; - return -1; + if (i == 0) return m_x; + if (i == 1) return m_y; + if (i == 2) return m_z; + if (i == 3) return m_t; + return -1; } bool operator==(const FourVector& o) { - return (m_x == o.m_x && m_y == o.m_y && - m_z == o.m_z && m_t == o.m_t); + return (m_x == o.m_x && m_y == o.m_y && + m_z == o.m_z && m_t == o.m_t); } public: diff --git a/pypy/module/_cppyy/test/test_advancedcpp.py b/pypy/module/_cppyy/test/test_advancedcpp.py --- a/pypy/module/_cppyy/test/test_advancedcpp.py +++ b/pypy/module/_cppyy/test/test_advancedcpp.py @@ -673,5 +673,5 @@ try: t.throw_exception() - except Exception, e: + except Exception as e: "C++ function failed" in str(e) diff --git a/pypy/module/_cppyy/test/test_cppyy.py b/pypy/module/_cppyy/test/test_cppyy.py --- a/pypy/module/_cppyy/test/test_cppyy.py +++ b/pypy/module/_cppyy/test/test_cppyy.py @@ -50,9 +50,13 @@ import sys, math t = self.example01 + pylong = int + if sys.hexversion < 0x3000000: + pylong = long + res = t.get_overload("staticAddOneToInt")(1) assert res == 2 - res = t.get_overload("staticAddOneToInt")(1L) + res = t.get_overload("staticAddOneToInt")(pylong(1)) assert res == 2 res = t.get_overload("staticAddOneToInt")(1, 2) assert res == 4 diff --git a/pypy/module/_cppyy/test/test_crossing.py b/pypy/module/_cppyy/test/test_crossing.py --- a/pypy/module/_cppyy/test/test_crossing.py +++ b/pypy/module/_cppyy/test/test_crossing.py @@ -93,7 +93,12 @@ %(body)s PyMODINIT_FUNC - init%(name)s(void) { + #if PY_MAJOR_VERSION >= 3 + PyInit_%(name)s(void) + #else + init%(name)s(void) + #endif + { %(init)s } """ % dict(name=name, init=init, body=body) @@ -116,8 +121,20 @@ name = 'bar' init = """ - if (Py_IsInitialized()) + #if PY_MAJOR_VERSION >= 3 + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "bar", "Module Doc", -1, methods, NULL, NULL, NULL, NULL, + }; + #endif + + if (Py_IsInitialized()) { + #if PY_MAJOR_VERSION >= 3 + PyObject *module = PyModule_Create(&moduledef); + #else Py_InitModule("bar", methods); + #endif + } """ # note: only the symbols are needed for C, none for python diff --git a/pypy/module/_cppyy/test/test_zjit.py b/pypy/module/_cppyy/test/test_zjit.py --- a/pypy/module/_cppyy/test/test_zjit.py +++ b/pypy/module/_cppyy/test/test_zjit.py @@ -71,7 +71,9 @@ self.name = name self.__name__ = name def getname(self, space, name): - return self.name + if sys.hexversion < 0x3000000: + return self.name + return unicode(self.name) class FakeBuffer(FakeBase): typedname = "buffer" def __init__(self, val): @@ -108,9 +110,11 @@ class FakeSpace(object): fake = True - w_None = None - w_str = FakeType("str") - w_int = FakeType("int") + w_None = None + w_str = FakeType("str") + w_text = FakeType("str") + w_bytes = FakeType("str") + w_int = FakeType("int") w_float = FakeType("float") def __init__(self): From pypy.commits at gmail.com Sun Jun 10 20:14:22 2018 From: pypy.commits at gmail.com (wlav) Date: Sun, 10 Jun 2018 17:14:22 -0700 (PDT) Subject: [pypy-commit] pypy default: Upgrade to backend 1.1.0, improved handling of templated methods and Message-ID: <5b1dbede.1c69fb81.877ab.e92e@mx.google.com> Author: Wim Lavrijsen Branch: Changeset: r94751:b505aee6a14e Date: 2018-06-10 16:52 -0700 http://bitbucket.org/pypy/pypy/changeset/b505aee6a14e/ Log: Upgrade to backend 1.1.0, improved handling of templated methods and functions (in particular automatic deduction of types), improved pythonization interface, and a range of compatibility fixes for Python3 diff too long, truncating to 2000 out of 4635 lines diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -7,9 +7,9 @@ .. branch: cppyy-packaging -Upgrade to backend 0.6.0, support exception handling from wrapped functions, -update enum handling, const correctness for data members and associated tests, -support anonymous enums, support for function pointer arguments +Upgrade to backend 1.1.0, improved handling of templated methods and +functions (in particular automatic deduction of types), improved pythonization +interface, and a range of compatibility fixes for Python3 .. branch: socket_default_timeout_blockingness diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -1,7 +1,7 @@ from pypy.interpreter.mixedmodule import MixedModule class Module(MixedModule): - "This module brigdes the cppyy frontend with its backend, through PyPy.\n\ + "This module bridges the cppyy frontend with its backend, through PyPy.\n\ See http://cppyy.readthedocs.io/en/latest for full details." interpleveldefs = { @@ -14,17 +14,19 @@ '_set_function_generator': 'interp_cppyy.set_function_generator', '_register_class' : 'interp_cppyy.register_class', '_get_nullptr' : 'interp_cppyy.get_nullptr', - 'CPPInstanceBase' : 'interp_cppyy.W_CPPInstance', + 'CPPInstance' : 'interp_cppyy.W_CPPInstance', 'addressof' : 'interp_cppyy.addressof', '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', 'move' : 'interp_cppyy.move', + '_pin_type' : 'interp_cppyy._pin_type', } appleveldefs = { '_post_import_startup' : 'pythonify._post_import_startup', + 'Template' : 'pythonify.CPPTemplate', 'add_pythonization' : 'pythonify.add_pythonization', - 'Template' : 'pythonify.CPPTemplate', + 'remove_pythonization' : 'pythonify.remove_pythonization', } def __init__(self, space, *args): diff --git a/pypy/module/_cppyy/capi/__init__.py b/pypy/module/_cppyy/capi/__init__.py --- a/pypy/module/_cppyy/capi/__init__.py +++ b/pypy/module/_cppyy/capi/__init__.py @@ -11,6 +11,3 @@ assert lltype.typeOf(ptr) == C_OBJECT address = rffi.cast(rffi.CCHARP, ptr) return rffi.cast(C_OBJECT, lltype.direct_ptradd(address, offset)) - -def exchange_address(ptr, cif_descr, index): - return rffi.ptradd(ptr, cif_descr.exchange_args[index]) diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -69,7 +69,8 @@ space = self.space cif_descr = self.cif_descr size = cif_descr.exchange_size - raw_string = rffi.cast(rffi.CCHARP, 0) # only ever have one in the CAPI + raw_string1 = rffi.cast(rffi.CCHARP, 0) + raw_string2 = rffi.cast(rffi.CCHARP, 0) # have max two in any CAPI buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw') try: for i in range(len(args)): @@ -88,14 +89,18 @@ assert obj._voidp != rffi.cast(rffi.VOIDP, 0) data = rffi.cast(rffi.VOIDPP, data) data[0] = obj._voidp - else: # only other use is sring + else: # only other use is string assert obj.tc == 's' n = len(obj._string) - assert raw_string == rffi.cast(rffi.CCHARP, 0) - # XXX could use rffi.get_nonmovingbuffer_final_null() - raw_string = rffi.str2charp(obj._string) data = rffi.cast(rffi.CCHARPP, data) - data[0] = raw_string + if raw_string1 == rffi.cast(rffi.CCHARP, 0): + # XXX could use rffi.get_nonmovingbuffer_final_null() + raw_string1 = rffi.str2charp(obj._string) + data[0] = raw_string1 + else: + assert raw_string2 == rffi.cast(rffi.CCHARP, 0) + raw_string2 = rffi.str2charp(obj._string) + data[0] = raw_string2 jit_libffi.jit_ffi_call(cif_descr, rffi.cast(rffi.VOIDP, funcaddr), @@ -106,8 +111,10 @@ # immediate unwrapping, the round-trip is removed w_res = self.ctitem.copy_and_convert_to_object(resultdata) finally: - if raw_string != rffi.cast(rffi.CCHARP, 0): - rffi.free_charp(raw_string) + if raw_string1 != rffi.cast(rffi.CCHARP, 0): + rffi.free_charp(raw_string1) + if raw_string2 != rffi.cast(rffi.CCHARP, 0): + rffi.free_charp(raw_string2) lltype.free(buffer, flavor='raw') return w_res @@ -183,8 +190,7 @@ 'constructor' : ([c_method, c_object, c_int, c_voidp], c_object), 'call_o' : ([c_method, c_object, c_int, c_voidp, c_type], c_object), - 'function_address_from_index' : ([c_scope, c_index], c_voidp), # TODO: verify - 'function_address_from_method' : ([c_method], c_voidp), # id. + 'function_address' : ([c_method], c_voidp), # TODO: verify # handling of function argument buffer 'allocate_function_args' : ([c_int], c_voidp), @@ -207,6 +213,8 @@ 'num_bases' : ([c_type], c_int), 'base_name' : ([c_type, c_int], c_ccharp), 'is_subtype' : ([c_type, c_type], c_int), + 'smartptr_info' : ([c_ccharp, c_voidp, c_voidp], c_int), + 'add_smartptr_type' : ([c_ccharp], c_void), 'base_offset' : ([c_type, c_type, c_object, c_int], c_ptrdiff_t), @@ -214,30 +222,31 @@ 'num_methods' : ([c_scope], c_int), 'method_indices_from_name' : ([c_scope, c_ccharp], c_index_array), - 'method_name' : ([c_scope, c_index], c_ccharp), - 'method_mangled_name' : ([c_scope, c_index], c_ccharp), - 'method_result_type' : ([c_scope, c_index], c_ccharp), - 'method_num_args' : ([c_scope, c_index], c_int), - 'method_req_args' : ([c_scope, c_index], c_int), - 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), - 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), - 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), + 'get_method' : ([c_scope, c_index], c_method), + + 'method_name' : ([c_method], c_ccharp), + 'method_full_name' : ([c_method], c_ccharp), + 'method_mangled_name' : ([c_method], c_ccharp), + 'method_result_type' : ([c_method], c_ccharp), + 'method_num_args' : ([c_method], c_int), + 'method_req_args' : ([c_method], c_int), + 'method_arg_type' : ([c_method, c_int], c_ccharp), + 'method_arg_default' : ([c_method, c_int], c_ccharp), + 'method_signature' : ([c_method, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_method, c_int], c_ccharp), 'is_const_method' : ([c_method], c_int), 'exists_method_template' : ([c_scope, c_ccharp], c_int), 'method_is_template' : ([c_scope, c_index], c_int), - 'method_num_template_args' : ([c_scope, c_index], c_int), - 'method_template_arg_name' : ([c_scope, c_index, c_index], c_ccharp), + 'get_method_template' : ([c_scope, c_ccharp, c_ccharp], c_method), - 'get_method' : ([c_scope, c_index], c_method), 'get_global_operator' : ([c_scope, c_scope, c_scope, c_ccharp], c_index), # method properties - 'is_public_method' : ([c_type, c_index], c_int), - 'is_constructor' : ([c_type, c_index], c_int), - 'is_destructor' : ([c_type, c_index], c_int), - 'is_staticmethod' : ([c_type, c_index], c_int), + 'is_public_method' : ([c_method], c_int), + 'is_constructor' : ([c_method], c_int), + 'is_destructor' : ([c_method], c_int), + 'is_staticmethod' : ([c_method], c_int), # data member reflection information 'num_datamembers' : ([c_scope], c_int), @@ -415,13 +424,9 @@ args = [_ArgH(cppmethod), _ArgH(cppobject), _ArgL(nargs), _ArgP(cargs), _ArgH(cppclass.handle)] return _cdata_to_cobject(space, call_capi(space, 'call_o', args)) -def c_function_address_from_index(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_function_address(space, cppmethod): return rffi.cast(C_FUNC_PTR, - _cdata_to_ptr(space, call_capi(space, 'function_address_from_index', args))) -def c_function_address_from_method(space, cppmethod): - return rffi.cast(C_FUNC_PTR, - _cdata_to_ptr(space, call_capi(space, 'function_address_from_method', [_ArgH(cppmethod)]))) + _cdata_to_ptr(space, call_capi(space, 'function_address', [_ArgH(cppmethod)]))) # handling of function argument buffer --------------------------------------- def c_allocate_function_args(space, size): @@ -479,6 +484,21 @@ if derived == base: return bool(1) return space.bool_w(call_capi(space, 'is_subtype', [_ArgH(derived.handle), _ArgH(base.handle)])) +def c_smartptr_info(space, name): + out_raw = lltype.malloc(rffi.ULONGP.TO, 1, flavor='raw', zero=True) + out_deref = lltype.malloc(rffi.ULONGP.TO, 1, flavor='raw', zero=True) + try: + args = [_ArgS(name), + _ArgP(rffi.cast(rffi.VOIDP, out_raw)), _ArgP(rffi.cast(rffi.VOIDP, out_deref))] + result = space.bool_w(call_capi(space, 'smartptr_info', args)) + raw = rffi.cast(C_TYPE, out_raw[0]) + deref = rffi.cast(C_METHOD, out_deref[0]) + finally: + lltype.free(out_deref, flavor='raw') + lltype.free(out_raw, flavor='raw') + return (result, raw, deref) +def c_add_smartptr_type(space, name): + return space.bool_w(call_capi(space, 'add_smartptr_type', [_ArgS(name)])) def _c_base_offset(space, derived_h, base_h, address, direction): args = [_ArgH(derived_h), _ArgH(base_h), _ArgH(address), _ArgL(direction)] @@ -510,30 +530,36 @@ c_free(space, rffi.cast(rffi.VOIDP, indices)) # c_free defined below return py_indices -def c_method_name(space, cppscope, index): +def c_get_method(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] - return charp2str_free(space, call_capi(space, 'method_name', args)) -def c_method_result_type(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return charp2str_free(space, call_capi(space, 'method_result_type', args)) -def c_method_num_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_num_args', args)) -def c_method_req_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_req_args', args)) -def c_method_arg_type(space, cppscope, index, arg_index): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] + return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method', args))) + +def c_method_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_name', [_ArgH(cppmeth)])) +def c_method_full_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_full_name', [_ArgH(cppmeth)])) +def c_method_mangled_name(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_mangled_name', [_ArgH(cppmeth)])) +def c_method_result_type(space, cppmeth): + return charp2str_free(space, call_capi(space, 'method_result_type', [_ArgH(cppmeth)])) +def c_method_num_args(space, cppmeth): + return space.int_w(call_capi(space, 'method_num_args', [_ArgH(cppmeth)])) +def c_method_req_args(space, cppmeth): + return space.int_w(call_capi(space, 'method_req_args', [_ArgH(cppmeth)])) +def c_method_arg_type(space, cppmeth, arg_index): + args = [_ArgH(cppmeth), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_type', args)) -def c_method_arg_default(space, cppscope, index, arg_index): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] +def c_method_arg_default(space, cppmeth, arg_index): + args = [_ArgH(cppmeth), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index, show_formalargs=True): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] +def c_method_signature(space, cppmeth, show_formalargs=True): + args = [_ArgH(cppmeth), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) -def c_method_prototype(space, cppscope, index, show_formalargs=True): - args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] +def c_method_prototype(space, cppscope, cppmeth, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgH(cppmeth), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_prototype', args)) +def c_is_const_method(space, cppmeth): + return space.bool_w(call_capi(space, 'is_const_method', [_ArgH(cppmeth)])) def c_exists_method_template(space, cppscope, name): args = [_ArgH(cppscope.handle), _ArgS(name)] @@ -541,21 +567,10 @@ def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] return space.bool_w(call_capi(space, 'method_is_template', args)) -def _c_method_num_template_args(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return space.int_w(call_capi(space, 'method_num_template_args', args)) -def c_template_args(space, cppscope, index): - nargs = _c_method_num_template_args(space, cppscope, index) - arg1 = _ArgH(cppscope.handle) - arg2 = _ArgL(index) - args = [c_resolve_name(space, charp2str_free(space, - call_capi(space, 'method_template_arg_name', [arg1, arg2, _ArgL(iarg)])) - ) for iarg in range(nargs)] - return args -def c_get_method(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] - return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method', args))) +def c_get_method_template(space, cppscope, name, proto): + args = [_ArgH(cppscope.handle), _ArgS(name), _ArgS(proto)] + return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method_template', args))) def c_get_global_operator(space, nss, lc, rc, op): if nss is not None: args = [_ArgH(nss.handle), _ArgH(lc.handle), _ArgH(rc.handle), _ArgS(op)] @@ -563,18 +578,14 @@ return rffi.cast(WLAVC_INDEX, -1) # method properties ---------------------------------------------------------- -def c_is_public_method(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_public_method', args)) -def c_is_constructor(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_constructor', args)) -def c_is_destructor(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_destructor', args)) -def c_is_staticmethod(space, cppclass, index): - args = [_ArgH(cppclass.handle), _ArgL(index)] - return space.bool_w(call_capi(space, 'is_staticmethod', args)) +def c_is_public_method(space, cppmeth): + return space.bool_w(call_capi(space, 'is_public_method', [_ArgH(cppmeth)])) +def c_is_constructor(space, cppmeth): + return space.bool_w(call_capi(space, 'is_constructor', [_ArgH(cppmeth)])) +def c_is_destructor(space, cppmeth): + return space.bool_w(call_capi(space, 'is_destructor', [_ArgH(cppmeth)])) +def c_is_staticmethod(space, cppmeth): + return space.bool_w(call_capi(space, 'is_staticmethod', [_ArgH(cppmeth)])) # data member reflection information ----------------------------------------- def c_num_datamembers(space, cppscope): @@ -676,7 +687,7 @@ space.setattr(w_pycppclass, space.newtext(m1), space.getattr(w_pycppclass, space.newtext(m2))) -def pythonize(space, name, w_pycppclass): +def pythonize(space, w_pycppclass, name): if name == "string": space.setattr(w_pycppclass, space.newtext("c_str"), _pythonizations["stdstring_c_str"]) _method_alias(space, w_pycppclass, "_cppyy_as_builtin", "c_str") diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -7,7 +7,7 @@ from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp -from pypy.module._rawffi.array import W_Array, W_ArrayInstance +from pypy.module._rawffi.array import W_ArrayInstance from pypy.module._cppyy import helper, capi, ffitypes @@ -68,6 +68,8 @@ pass # array type try: + if hasattr(space, "fake"): + raise NotImplementedError arr = space.interp_w(W_ArrayInstance, w_obj, can_be_None=True) if arr: return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) @@ -130,20 +132,6 @@ pass -class ArrayCache(object): - def __init__(self, space): - self.space = space - def __getattr__(self, name): - if name.startswith('array_'): - typecode = name[len('array_'):] - arr = self.space.interp_w(W_Array, letter2tp(self.space, typecode)) - setattr(self, name, arr) - return arr - raise AttributeError(name) - - def _freeze_(self): - return True - class ArrayTypeConverterMixin(object): _mixin_ = True _immutable_fields_ = ['size'] @@ -162,9 +150,7 @@ # read access, so no copy needed address_value = self._get_raw_address(space, w_obj, offset) address = rffi.cast(rffi.ULONG, address_value) - cache = space.fromcache(ArrayCache) - arr = getattr(cache, 'array_' + self.typecode) - return arr.fromaddress(space, address, self.size) + return W_ArrayInstance(space, letter2tp(space, self.typecode), self.size, address) def to_memory(self, space, w_obj, w_value, offset): # copy the full array (uses byte copy for now) @@ -205,17 +191,15 @@ # read access, so no copy needed address_value = self._get_raw_address(space, w_obj, offset) address = rffi.cast(rffi.ULONGP, address_value) - cache = space.fromcache(ArrayCache) - arr = getattr(cache, 'array_' + self.typecode) - return arr.fromaddress(space, address[0], self.size) + return W_ArrayInstance(space, letter2tp(space, self.typecode), self.size, address[0]) def to_memory(self, space, w_obj, w_value, offset): # copy only the pointer value rawobject = get_rawobject_nonnull(space, w_obj) - byteptr = rffi.cast(rffi.CCHARPP, capi.direct_ptradd(rawobject, offset)) + byteptr = rffi.cast(rffi.VOIDPP, capi.direct_ptradd(rawobject, offset)) buf = space.getarg_w('s*', w_value) try: - byteptr[0] = buf.get_raw_address() + byteptr[0] = rffi.cast(rffi.VOIDP, buf.get_raw_address()) except ValueError: raise oefmt(space.w_TypeError, "raw buffer interface not supported") @@ -337,6 +321,10 @@ address = rffi.cast(rffi.CCHARP, self._get_raw_address(space, w_obj, offset)) address[0] = self._unwrap_object(space, w_value) + +class UCharConverter(ffitypes.typeid(rffi.UCHAR), CharConverter): + pass + class FloatConverter(ffitypes.typeid(rffi.FLOAT), FloatTypeConverterMixin, TypeConverter): _immutable_fields_ = ['default'] @@ -398,12 +386,12 @@ arg = space.text_w(w_obj) x[0] = rffi.cast(rffi.LONG, rffi.str2charp(arg)) ba = rffi.cast(rffi.CCHARP, address) - ba[capi.c_function_arg_typeoffset(space)] = 'o' + ba[capi.c_function_arg_typeoffset(space)] = 'p' def from_memory(self, space, w_obj, w_pycppclass, offset): address = self._get_raw_address(space, w_obj, offset) charpptr = rffi.cast(rffi.CCHARPP, address) - return space.newbytes(rffi.charp2str(charpptr[0])) + return space.newtext(rffi.charp2str(charpptr[0])) def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') @@ -420,7 +408,7 @@ strsize = self.size if charpptr[self.size-1] == '\0': strsize = self.size-1 # rffi will add \0 back - return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + return space.newtext(rffi.charpsize2str(charpptr, strsize)) class VoidPtrConverter(TypeConverter): @@ -449,12 +437,12 @@ # returned as a long value for the address (INTPTR_T is not proper # per se, but rffi does not come with a PTRDIFF_T) address = self._get_raw_address(space, w_obj, offset) - ptrval = rffi.cast(rffi.ULONG, rffi.cast(rffi.VOIDPP, address)[0]) - if ptrval == 0: + ptrval = rffi.cast(rffi.ULONGP, address)[0] + if ptrval == rffi.cast(rffi.ULONG, 0): from pypy.module._cppyy import interp_cppyy return interp_cppyy.get_nullptr(space) - arr = space.interp_w(W_Array, letter2tp(space, 'P')) - return arr.fromaddress(space, ptrval, sys.maxint) + shape = letter2tp(space, 'P') + return W_ArrayInstance(space, shape, sys.maxint/shape.size, ptrval) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -504,8 +492,8 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPInstance if isinstance(w_obj, W_CPPInstance): - from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE - if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_RVALUE + if w_obj.flags & INSTANCE_FLAGS_IS_RVALUE: # reject moves as all are explicit raise ValueError("lvalue expected") if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): @@ -514,7 +502,7 @@ obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, - "cannot pass %T as %s", w_obj, self.clsdecl.name) + "cannot pass %T instance as %s", w_obj, self.clsdecl.name) def cffi_type(self, space): state = space.fromcache(ffitypes.State) @@ -534,11 +522,18 @@ class InstanceMoveConverter(InstanceRefConverter): def _unwrap_object(self, space, w_obj): # moving is same as by-ref, but have to check that move is allowed - from pypy.module._cppyy.interp_cppyy import W_CPPInstance, INSTANCE_FLAGS_IS_R_VALUE - if isinstance(w_obj, W_CPPInstance): - if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: - w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE - return InstanceRefConverter._unwrap_object(self, space, w_obj) + from pypy.module._cppyy.interp_cppyy import W_CPPInstance, INSTANCE_FLAGS_IS_RVALUE + obj = space.interp_w(W_CPPInstance, w_obj) + if obj: + if obj.flags & INSTANCE_FLAGS_IS_RVALUE: + obj.flags &= ~INSTANCE_FLAGS_IS_RVALUE + try: + return InstanceRefConverter._unwrap_object(self, space, w_obj) + except Exception: + # TODO: if the method fails on some other converter, then the next + # overload can not be an rvalue anymore + obj.flags |= INSTANCE_FLAGS_IS_RVALUE + raise raise oefmt(space.w_ValueError, "object is not an rvalue") @@ -629,8 +624,7 @@ address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy - assign.call( - interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) + assign.call_impl(address, [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -639,7 +633,6 @@ class StdStringRefConverter(InstancePtrConverter): _immutable_fields_ = ['cppclass', 'typecode'] - typecode = 'V' def __init__(self, space, extra): @@ -702,8 +695,7 @@ m = cppol.functions[i] if m.signature(False) == self.signature: x = rffi.cast(rffi.VOIDPP, address) - x[0] = rffi.cast(rffi.VOIDP, - capi.c_function_address_from_method(space, m.cppmethod)) + x[0] = rffi.cast(rffi.VOIDP, capi.c_function_address(space, m.cppmethod)) address = rffi.cast(capi.C_OBJECT, address) ba = rffi.cast(rffi.CCHARP, address) ba[capi.c_function_arg_typeoffset(space)] = 'p' @@ -714,6 +706,67 @@ "no overload found matching %s", self.signature) +class SmartPointerConverter(TypeConverter): + _immutable_fields = ['typecode', 'smartdecl', 'rawdecl', 'deref'] + typecode = 'V' + + def __init__(self, space, smartdecl, raw, deref): + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + self.smartdecl = smartdecl + w_raw = get_pythonized_cppclass(space, raw) + self.rawdecl = space.interp_w(W_CPPClassDecl, + space.findattr(w_raw, space.newtext("__cppdecl__"))) + self.deref = deref + + def _unwrap_object(self, space, w_obj): + from pypy.module._cppyy.interp_cppyy import W_CPPInstance + if isinstance(w_obj, W_CPPInstance): + # w_obj could carry a 'hidden' smart ptr or be one, cover both cases + have_match = False + if w_obj.smartdecl and capi.c_is_subtype(space, w_obj.smartdecl, self.smartdecl): + # hidden case, do not derefence when getting obj address + have_match = True + rawobject = w_obj._rawobject # TODO: this direct access if fugly + offset = capi.c_base_offset(space, w_obj.smartdecl, self.smartdecl, rawobject, 1) + elif capi.c_is_subtype(space, w_obj.clsdecl, self.smartdecl): + # exposed smart pointer + have_match = True + rawobject = w_obj.get_rawobject() + offset = capi.c_base_offset(space, w_obj.clsdecl, self.smartdecl, rawobject, 1) + if have_match: + obj_address = capi.direct_ptradd(rawobject, offset) + return rffi.cast(capi.C_OBJECT, obj_address) + + raise oefmt(space.w_TypeError, + "cannot pass %T instance as %s", w_obj, self.rawdecl.name) + + def convert_argument(self, space, w_obj, address, call_local): + x = rffi.cast(rffi.VOIDPP, address) + x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) + address = rffi.cast(capi.C_OBJECT, address) + ba = rffi.cast(rffi.CCHARP, address) + ba[capi.c_function_arg_typeoffset(space)] = self.typecode + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, address, + self.rawdecl, smartdecl=self.smartdecl, deref=self.deref, do_cast=False) + +class SmartPointerPtrConverter(SmartPointerConverter): + typecode = 'o' + + def from_memory(self, space, w_obj, w_pycppclass, offset): + self._is_abstract(space) + + def to_memory(self, space, w_obj, w_value, offset): + self._is_abstract(space) + + +class SmartPointerRefConverter(SmartPointerPtrConverter): + typecode = 'V' + + class MacroConverter(TypeConverter): def from_memory(self, space, w_obj, w_pycppclass, offset): # TODO: get the actual type info from somewhere ... @@ -729,44 +782,55 @@ # 1) full, exact match # 1a) const-removed match # 2) match of decorated, unqualified type - # 3) accept ref as pointer (for the stubs, const& can be - # by value, but that does not work for the ffi path) - # 4) generalized cases (covers basically all user classes) - # 5) void* or void converter (which fails on use) + # 3) generalized cases (covers basically all user classes) + # 3a) smart pointers + # 4) void* or void converter (which fails on use) name = capi.c_resolve_name(space, _name) - # 1) full, exact match + # full, exact match try: return _converters[name](space, default) except KeyError: pass - # 1a) const-removed match + # const-removed match try: return _converters[helper.remove_const(name)](space, default) except KeyError: pass - # 2) match of decorated, unqualified type + # match of decorated, unqualified type compound = helper.compound(name) clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found array_size = helper.array_size(_name) # uses original arg + # TODO: using clean_name here drops const (e.g. const char[] will + # never be seen this way) return _a_converters[clean_name+compound](space, array_size) except KeyError: pass - # 3) TODO: accept ref as pointer - - # 4) generalized cases (covers basically all user classes) + # generalized cases (covers basically all user classes) from pypy.module._cppyy import interp_cppyy scope_decl = interp_cppyy.scope_byname(space, clean_name) if scope_decl: - # type check for the benefit of the annotator from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl clsdecl = space.interp_w(W_CPPClassDecl, scope_decl, can_be_None=False) + + # check smart pointer type + check_smart = capi.c_smartptr_info(space, clean_name) + if check_smart[0]: + if compound == '': + return SmartPointerConverter(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '*': + return SmartPointerPtrConverter(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '&': + return SmartPointerRefConverter(space, clsdecl, check_smart[1], check_smart[2]) + # fall through: can still return smart pointer in non-smart way + + # type check for the benefit of the annotator if compound == "*": return InstancePtrConverter(space, clsdecl) elif compound == "&": @@ -786,7 +850,7 @@ if pos > 0: return FunctionPointerConverter(space, name[pos+2:]) - # 5) void* or void converter (which fails on use) + # void* or void converter (which fails on use) if 0 <= compound.find('*'): return VoidPtrConverter(space, default) # "user knows best" @@ -797,6 +861,7 @@ _converters["bool"] = BoolConverter _converters["char"] = CharConverter +_converters["unsigned char"] = UCharConverter _converters["float"] = FloatConverter _converters["const float&"] = ConstFloatRefConverter _converters["double"] = DoubleConverter @@ -886,6 +951,7 @@ "NOT_RPYTHON" array_info = ( ('b', rffi.sizeof(rffi.UCHAR), ("bool",)), # is debatable, but works ... + ('B', rffi.sizeof(rffi.UCHAR), ("unsigned char",)), ('h', rffi.sizeof(rffi.SHORT), ("short int", "short")), ('H', rffi.sizeof(rffi.USHORT), ("unsigned short int", "unsigned short")), ('i', rffi.sizeof(rffi.INT), ("int",)), @@ -901,9 +967,11 @@ for tcode, tsize, names in array_info: class ArrayConverter(ArrayTypeConverterMixin, TypeConverter): + _immutable_fields_ = ['typecode', 'typesize'] typecode = tcode typesize = tsize class PtrConverter(PtrTypeConverterMixin, TypeConverter): + _immutable_fields_ = ['typecode', 'typesize'] typecode = tcode typesize = tsize for name in names: @@ -912,6 +980,7 @@ # special case, const char* w/ size and w/o '\0' _a_converters["const char[]"] = CStringConverterWithSize + _a_converters["char[]"] = _a_converters["const char[]"] # debatable _build_array_converters() @@ -919,7 +988,6 @@ def _add_aliased_converters(): "NOT_RPYTHON" aliases = ( - ("char", "unsigned char"), # TODO: check ("char", "signed char"), # TODO: check ("const char*", "char*"), diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -5,7 +5,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib import jit_libffi -from pypy.module._rawffi.interp_rawffi import unpack_simple_shape +from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance from pypy.module._cppyy import helper, capi, ffitypes @@ -56,11 +56,11 @@ raise NotImplementedError lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptrval = rffi.cast(rffi.ULONG, lresult) - arr = space.interp_w(W_Array, unpack_simple_shape(space, space.newtext(self.typecode))) - if ptrval == 0: + if ptrval == rffi.cast(rffi.ULONG, 0): from pypy.module._cppyy import interp_cppyy return interp_cppyy.get_nullptr(space) - return arr.fromaddress(space, ptrval, sys.maxint) + shape = letter2tp(space, self.typecode) + return W_ArrayInstance(space, shape, sys.maxint/shape.size, ptrval) class VoidExecutor(FunctionExecutor): @@ -125,7 +125,6 @@ class CStringExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ccpresult = rffi.cast(rffi.CCHARP, lresult) @@ -136,7 +135,6 @@ class ConstructorExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cpptype, num_args, args): from pypy.module._cppyy import interp_cppyy newthis = capi.c_constructor(space, cppmethod, cpptype, num_args, args) @@ -144,80 +142,77 @@ return space.newlong(rffi.cast(rffi.LONG, newthis)) # really want ptrdiff_t here -class InstancePtrExecutor(FunctionExecutor): - _immutable_fields_ = ['cppclass'] +class InstanceExecutor(FunctionExecutor): + # For return of a C++ instance by pointer: MyClass* func() + _immutable_fields_ = ['clsdecl'] - def __init__(self, space, cppclass): - FunctionExecutor.__init__(self, space, cppclass) - self.cppclass = cppclass + def __init__(self, space, clsdecl): + FunctionExecutor.__init__(self, space, clsdecl) + self.clsdecl = clsdecl + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, + obj, self.clsdecl, do_cast=False, python_owns=True, fresh=True) + + def execute(self, space, cppmethod, cppthis, num_args, args): + oresult = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.clsdecl) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, oresult)) + + +class InstancePtrExecutor(InstanceExecutor): + # For return of a C++ instance by pointer: MyClass* func() def cffi_type(self, space): state = space.fromcache(ffitypes.State) return state.c_voidp + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl) + def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) - return pyres + lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, lresult)) def execute_libffi(self, space, cif_descr, funcaddr, buffer): jit_libffi.jit_ffi_call(cif_descr, funcaddr, buffer) - result = rffi.ptradd(buffer, cif_descr.exchange_result) - from pypy.module._cppyy import interp_cppyy - ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = rffi.ptradd(buffer, cif_descr.exchange_result) + obj = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, presult)[0]) + return self._wrap_result(space, obj) class InstancePtrPtrExecutor(InstancePtrExecutor): + # For return of a C++ instance by ptr-to-ptr or ptr-to-ref: MyClass*& func() def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) - ref_address = rffi.cast(rffi.VOIDPP, voidp_result) - ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = capi.c_call_r(space, cppmethod, cppthis, num_args, args) + ref = rffi.cast(rffi.VOIDPP, presult) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, ref[0])) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible -class InstanceExecutor(InstancePtrExecutor): - - def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) - - def execute_libffi(self, space, cif_descr, funcaddr, buffer): - from pypy.module._cppyy.interp_cppyy import FastCallNotPossible - raise FastCallNotPossible - class StdStringExecutor(InstancePtrExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): cstr, cstr_len = capi.c_call_s(space, cppmethod, cppthis, num_args, args) pystr = rffi.charpsize2str(cstr, cstr_len) capi.c_free(space, rffi.cast(rffi.VOIDP, cstr)) - return space.newbytes(pystr) + return space.newbytes(pystr) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible class StdStringRefExecutor(InstancePtrExecutor): - - def __init__(self, space, cppclass): + def __init__(self, space, clsdecl): from pypy.module._cppyy import interp_cppyy - cppclass = interp_cppyy.scope_byname(space, capi.std_string_name) - InstancePtrExecutor.__init__(self, space, cppclass) + clsdecl = interp_cppyy.scope_byname(space, capi.std_string_name) + InstancePtrExecutor.__init__(self, space, clsdecl) class PyObjectExecutor(PtrTypeExecutor): - def wrap_result(self, space, lresult): space.getbuiltinmodule("cpyext") from pypy.module.cpyext.pyobject import PyObject, from_ref, make_ref, decref @@ -241,6 +236,41 @@ return self.wrap_result(space, rffi.cast(rffi.LONGP, result)[0]) +class SmartPointerExecutor(InstanceExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstanceExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False, python_owns=True, fresh=True) + +class SmartPointerPtrExecutor(InstancePtrExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + # TODO: share this with SmartPointerExecutor through in mixin + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstancePtrExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + # TODO: this is a pointer to a smart pointer, take ownership on the smart one? + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False) + + _executors = {} def get_executor(space, name): # Matching of 'name' to an executor factory goes through up to four levels: @@ -253,7 +283,7 @@ name = capi.c_resolve_name(space, name) - # 1) full, qualified match + # full, qualified match try: return _executors[name](space, None) except KeyError: @@ -262,13 +292,13 @@ compound = helper.compound(name) clean_name = capi.c_resolve_name(space, helper.clean_type(name)) - # 1a) clean lookup + # clean lookup try: return _executors[clean_name+compound](space, None) except KeyError: pass - # 2) drop '&': by-ref is pretty much the same as by-value, python-wise + # drop '&': by-ref is pretty much the same as by-value, python-wise if compound and compound[len(compound)-1] == '&': # TODO: this does not actually work with Reflex (?) try: @@ -276,19 +306,29 @@ except KeyError: pass - # 3) types/classes, either by ref/ptr or by value + # types/classes, either by ref/ptr or by value from pypy.module._cppyy import interp_cppyy cppclass = interp_cppyy.scope_byname(space, clean_name) if cppclass: # type check for the benefit of the annotator from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl - cppclass = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + clsdecl = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + + # check smart pointer type + check_smart = capi.c_smartptr_info(space, clean_name) + if check_smart[0]: + if compound == '': + return SmartPointerExecutor(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '*' or compound == '&': + return SmartPointerPtrExecutor(space, clsdecl, check_smart[1], check_smart[2]) + # fall through: can still return smart pointer in non-smart way + if compound == '': - return InstanceExecutor(space, cppclass) + return InstanceExecutor(space, clsdecl) elif compound == '*' or compound == '&': - return InstancePtrExecutor(space, cppclass) + return InstancePtrExecutor(space, clsdecl) elif compound == '**' or compound == '*&': - return InstancePtrPtrExecutor(space, cppclass) + return InstancePtrPtrExecutor(space, clsdecl) elif "(anonymous)" in name: # special case: enum w/o a type name return _executors["internal_enum_type_t"](space, None) diff --git a/pypy/module/_cppyy/ffitypes.py b/pypy/module/_cppyy/ffitypes.py --- a/pypy/module/_cppyy/ffitypes.py +++ b/pypy/module/_cppyy/ffitypes.py @@ -74,15 +74,52 @@ # allow int to pass to char and make sure that str is of length 1 if space.isinstance_w(w_value, space.w_int): ival = space.c_int_w(w_value) + if ival < -128 or 127 < ival: + raise oefmt(space.w_ValueError, "char arg not in range(-128,128)") + + value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) + else: + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) + if len(value) != 1: + raise oefmt(space.w_ValueError, + "char expected, got string of size %d", len(value)) + + value = rffi.cast(rffi.CHAR, value[0]) + return value # turn it into a "char" to the annotator + + def cffi_type(self, space): + state = space.fromcache(State) + return state.c_char + +class UCharTypeMixin(object): + _mixin_ = True + _immutable_fields_ = ['c_type', 'c_ptrtype'] + + c_type = rffi.UCHAR + c_ptrtype = rffi.CCHARP # there's no such thing as rffi.UCHARP + + def _wrap_object(self, space, obj): + return space.newbytes(obj) + + def _unwrap_object(self, space, w_value): + # allow int to pass to char and make sure that str is of length 1 + if space.isinstance_w(w_value, space.w_int): + ival = space.c_int_w(w_value) if ival < 0 or 256 <= ival: raise oefmt(space.w_ValueError, "char arg not in range(256)") value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) else: - value = space.text_w(w_value) + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) if len(value) != 1: raise oefmt(space.w_ValueError, - "char expected, got string of size %d", len(value)) + "usigned char expected, got string of size %d", len(value)) value = rffi.cast(rffi.CHAR, value[0]) return value # turn it into a "char" to the annotator @@ -277,6 +314,7 @@ "NOT_RPYTHON" if c_type == bool: return BoolTypeMixin if c_type == rffi.CHAR: return CharTypeMixin + if c_type == rffi.UCHAR: return UCharTypeMixin if c_type == rffi.SHORT: return ShortTypeMixin if c_type == rffi.USHORT: return UShortTypeMixin if c_type == rffi.INT: return IntTypeMixin diff --git a/pypy/module/_cppyy/helper.py b/pypy/module/_cppyy/helper.py --- a/pypy/module/_cppyy/helper.py +++ b/pypy/module/_cppyy/helper.py @@ -1,3 +1,4 @@ +import sys from rpython.rlib import rstring @@ -116,6 +117,17 @@ # TODO: perhaps absorb or "pythonify" these operators? return cppname +if sys.hexversion < 0x3000000: + CPPYY__div__ = "__div__" + CPPYY__idiv__ = "__idiv__" + CPPYY__long__ = "__long__" + CPPYY__bool__ = "__nonzero__" +else: + CPPYY__div__ = "__truediv__" + CPPYY__idiv__ = "__itruediv__" + CPPYY__long__ = "__int__" + CPPYY__bool__ = "__bool__" + # _operator_mappings["[]"] = "__setitem__" # depends on return type # _operator_mappings["+"] = "__add__" # depends on # of args (see __pos__) # _operator_mappings["-"] = "__sub__" # id. (eq. __neg__) @@ -123,7 +135,7 @@ # _operator_mappings["[]"] = "__getitem__" # depends on return type _operator_mappings["()"] = "__call__" -_operator_mappings["/"] = "__div__" # __truediv__ in p3 +_operator_mappings["/"] = CPPYY__div__ _operator_mappings["%"] = "__mod__" _operator_mappings["**"] = "__pow__" # not C++ _operator_mappings["<<"] = "__lshift__" @@ -136,7 +148,7 @@ _operator_mappings["+="] = "__iadd__" _operator_mappings["-="] = "__isub__" _operator_mappings["*="] = "__imul__" -_operator_mappings["/="] = "__idiv__" # __itruediv__ in p3 +_operator_mappings["/="] = CPPYY__idiv__ _operator_mappings["%="] = "__imod__" _operator_mappings["**="] = "__ipow__" _operator_mappings["<<="] = "__ilshift__" @@ -154,7 +166,7 @@ # the following type mappings are "exact" _operator_mappings["const char*"] = "__str__" _operator_mappings["int"] = "__int__" -_operator_mappings["long"] = "__long__" # __int__ in p3 +_operator_mappings["long"] = CPPYY__long__ _operator_mappings["double"] = "__float__" # the following type mappings are "okay"; the assumption is that they @@ -163,13 +175,13 @@ _operator_mappings["char*"] = "__str__" _operator_mappings["short"] = "__int__" _operator_mappings["unsigned short"] = "__int__" -_operator_mappings["unsigned int"] = "__long__" # __int__ in p3 -_operator_mappings["unsigned long"] = "__long__" # id. -_operator_mappings["long long"] = "__long__" # id. -_operator_mappings["unsigned long long"] = "__long__" # id. +_operator_mappings["unsigned int"] = CPPYY__long__ +_operator_mappings["unsigned long"] = CPPYY__long__ +_operator_mappings["long long"] = CPPYY__long__ +_operator_mappings["unsigned long long"] = CPPYY__long__ _operator_mappings["float"] = "__float__" -_operator_mappings["bool"] = "__nonzero__" # __bool__ in p3 +_operator_mappings["bool"] = CPPYY__bool__ # the following are not python, but useful to expose _operator_mappings["->"] = "__follow__" diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -76,9 +76,7 @@ cppyy_object_t cppyy_call_o(cppyy_method_t method, cppyy_object_t self, int nargs, void* args, cppyy_type_t result_type); RPY_EXTERN - cppyy_funcaddr_t cppyy_function_address_from_index(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN - cppyy_funcaddr_t cppyy_function_address_from_method(cppyy_method_t method); + cppyy_funcaddr_t cppyy_function_address(cppyy_method_t method); /* handling of function argument buffer ----------------------------------- */ RPY_EXTERN @@ -132,23 +130,28 @@ cppyy_index_t* cppyy_method_indices_from_name(cppyy_scope_t scope, const char* name); RPY_EXTERN - char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); + cppyy_method_t cppyy_get_method(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN - char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_name(cppyy_method_t); RPY_EXTERN - char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_full_name(cppyy_method_t); RPY_EXTERN - int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_mangled_name(cppyy_method_t); RPY_EXTERN - int cppyy_method_req_args(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_result_type(cppyy_method_t); RPY_EXTERN - char* cppyy_method_arg_type(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); + int cppyy_method_num_args(cppyy_method_t); RPY_EXTERN - char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); + int cppyy_method_req_args(cppyy_method_t); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + char* cppyy_method_arg_type(cppyy_method_t, int arg_index); RPY_EXTERN - char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + char* cppyy_method_arg_default(cppyy_method_t, int arg_index); + RPY_EXTERN + char* cppyy_method_signature(cppyy_method_t, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_method_t idx, int show_formalargs); RPY_EXTERN int cppyy_is_const_method(cppyy_method_t); @@ -157,25 +160,21 @@ RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN - int cppyy_method_num_template_args(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN - char* cppyy_method_template_arg_name(cppyy_scope_t scope, cppyy_index_t idx, cppyy_index_t iarg); + cppyy_method_t cppyy_get_method_template(cppyy_scope_t scope, const char* name, const char* proto); RPY_EXTERN - cppyy_method_t cppyy_get_method(cppyy_scope_t scope, cppyy_index_t idx); - RPY_EXTERN cppyy_index_t cppyy_get_global_operator( cppyy_scope_t scope, cppyy_scope_t lc, cppyy_scope_t rc, const char* op); /* method properties ------------------------------------------------------ */ RPY_EXTERN - int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_publicmethod(cppyy_method_t); RPY_EXTERN - int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_constructor(cppyy_method_t); RPY_EXTERN - int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_destructor(cppyy_method_t); RPY_EXTERN - int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); + int cppyy_is_staticmethod(cppyy_method_t); /* data member reflection information ------------------------------------- */ RPY_EXTERN diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -14,13 +14,21 @@ from pypy.module._cffi_backend import ctypefunc from pypy.module._cppyy import converter, executor, ffitypes, helper +CLASS_FLAGS_IS_PINNED = 0x0001 INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 INSTANCE_FLAGS_IS_REF = 0x0002 -INSTANCE_FLAGS_IS_R_VALUE = 0x0004 +INSTANCE_FLAGS_IS_RVALUE = 0x0004 OVERLOAD_FLAGS_USE_FFI = 0x0001 +FUNCTION_IS_GLOBAL = 0x0001 +FUNCTION_IS_STATIC = 0x0001 +FUNCTION_IS_METHOD = 0x0002 +FUNCTION_IS_CONSTRUCTOR = 0x0004 +FUNCTION_IS_TEMPLATE = 0x0008 +FUNCTION_IS_SETITEM = 0x0010 + class FastCallNotPossible(Exception): pass @@ -100,9 +108,9 @@ state.cppscope_cache[final_scoped_name] = cppscope if not isns: - # build methods/data; TODO: also defer this for classes (a functional __dir__ + # build overloads/data; TODO: also defer this for classes (a functional __dir__ # and instrospection for help() is enough and allows more lazy loading) - cppscope._build_methods() + cppscope._build_overloads() cppscope._find_datamembers() return cppscope @@ -131,7 +139,7 @@ cppclass = space.interp_w(W_CPPClassDecl, w_cppclass) # add back-end specific method pythonizations (doing this on the wrapped # class allows simple aliasing of methods) - capi.pythonize(space, cppclass.name, w_pycppclass) + capi.pythonize(space, w_pycppclass, cppclass.name) state = space.fromcache(State) state.cppclass_registry[rffi.cast(rffi.LONG, cppclass.handle)] = w_pycppclass @@ -150,17 +158,18 @@ #----- -# Classes involved with methods and functions: +# Classes involved with methods and functions come at two levels: +# - overloads: user-facing collections of overloaded functions +# - wrappers: internal holders of the individual C++ methods # -# CPPMethod: base class wrapping a single function or method -# CPPConstructor: specialization for allocating a new object -# CPPFunction: specialization for free and static functions +# W_CPPOverload: instance methods (base class) +# W_CPPConstructorOverload: constructors +# W_CPPStaticOverload: free and static functions +# W_CPPTemplateOverload: templated methods +# W_CPPTemplateStaticOveload: templated free and static functions +# +# CPPMethod: a single function or method (base class) # CPPSetItem: specialization for Python's __setitem__ -# CPPTemplatedCall: trampoline to instantiate and bind templated functions -# W_CPPOverload, W_CPPConstructorOverload, W_CPPTemplateOverload: -# user-facing, app-level, collection of overloads, with specializations -# for constructors and templates -# W_CPPBoundMethod: instantiated template method # # All methods/functions derive from CPPMethod and are collected as overload # candidates in user-facing overload classes. Templated methods are a two-step @@ -173,15 +182,15 @@ also takes care of offset casting and recycling of known objects through the memory_regulator.""" - _attrs_ = ['space', 'scope', 'index', 'cppmethod', 'arg_defs', 'args_required', + _attrs_ = ['space', 'scope', 'cppmethod', 'arg_defs', 'args_required', 'converters', 'executor', '_funcaddr', 'cif_descr', 'uses_local'] - _immutable_ = True + _immutable_fields_ = ['scope', 'cppmethod', 'arg_defs', 'args_required', + 'converters', 'executor', 'uses_local'] - def __init__(self, space, declaring_scope, method_index, arg_defs, args_required): + def __init__(self, space, declaring_scope, cppmethod, arg_defs, args_required): self.space = space self.scope = declaring_scope - self.index = method_index - self.cppmethod = capi.c_get_method(self.space, self.scope, method_index) + self.cppmethod = cppmethod self.arg_defs = arg_defs self.args_required = args_required @@ -193,12 +202,6 @@ self._funcaddr = lltype.nullptr(capi.C_FUNC_PTR.TO) self.uses_local = False - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - cppinstance = space.interp_w(W_CPPInstance, w_cppinstance) - cppinstance._nullcheck() - return cppinstance.get_cppthis(declaring_scope) - def _address_from_local_buffer(self, call_local, idx): if not call_local: return call_local @@ -266,11 +269,12 @@ def do_fast_call(self, cppthis, args_w, call_local): if self.cif_descr == lltype.nullptr(jit_libffi.CIF_DESCRIPTION): raise FastCallNotPossible + jit.promote(self) cif_descr = self.cif_descr buffer = lltype.malloc(rffi.CCHARP.TO, cif_descr.exchange_size, flavor='raw') try: # this pointer - data = capi.exchange_address(buffer, cif_descr, 0) + data = rffi.ptradd(buffer, cif_descr.exchange_args[0]) x = rffi.cast(rffi.LONGP, data) # LONGP needed for test_zjit.py x[0] = rffi.cast(rffi.LONG, cppthis) @@ -279,11 +283,11 @@ for i in range(len(args_w)): conv = self.converters[i] w_arg = args_w[i] - data = capi.exchange_address(buffer, cif_descr, i+1) + data = rffi.ptradd(buffer, cif_descr.exchange_args[i+1]) conv.convert_argument_libffi(self.space, w_arg, data, call_local) for j in range(i+1, len(self.arg_defs)): conv = self.converters[j] - data = capi.exchange_address(buffer, cif_descr, j+1) + data = rffi.ptradd(buffer, cif_descr.exchange_args[j+1]) conv.default_argument_libffi(self.space, data) assert self._funcaddr @@ -295,7 +299,7 @@ return w_res # from ctypefunc; have my own version for annotater purposes and to disable - # memory tracking (method live time is longer than the tests) + # memory tracking (method life time is longer than the tests) @jit.dont_look_inside def _rawallocate(self, builder): builder.space = self.space @@ -308,7 +312,7 @@ # allocate the buffer if we_are_translated(): rawmem = lltype.malloc(rffi.CCHARP.TO, builder.nb_bytes, - flavor='raw', track_allocation=False) + flavor='raw') rawmem = rffi.cast(jit_libffi.CIF_DESCRIPTION_P, rawmem) else: # gross overestimation of the length below, but too bad @@ -340,7 +344,7 @@ self.converters = [converter.get_converter(self.space, arg_type, arg_dflt) for arg_type, arg_dflt in self.arg_defs] self.executor = executor.get_executor( - self.space, capi.c_method_result_type(self.space, self.scope, self.index)) + self.space, capi.c_method_result_type(self.space, self.cppmethod)) for conv in self.converters: if conv.uses_local: @@ -350,8 +354,8 @@ # Each CPPMethod corresponds one-to-one to a C++ equivalent and cppthis # has been offset to the matching class. Hence, the libffi pointer is # uniquely defined and needs to be setup only once. - funcaddr = capi.c_function_address_from_index(self.space, self.scope, self.index) - if funcaddr and cppthis: # methods only for now + funcaddr = capi.c_function_address(self.space, self.cppmethod) + if funcaddr and cppthis: # TODO: methods only for now state = self.space.fromcache(ffitypes.State) # argument type specification (incl. cppthis) @@ -418,10 +422,10 @@ capi.c_deallocate_function_args(self.space, args) def signature(self, show_formalargs=True): - return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + return capi.c_method_signature(self.space, self.cppmethod, show_formalargs) def prototype(self, show_formalargs=True): - return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) + return capi.c_method_prototype(self.space, self.scope, self.cppmethod, show_formalargs) def priority(self): total_arg_priority = 0 @@ -431,8 +435,11 @@ @rgc.must_be_light_finalizer def __del__(self): - if self.cif_descr: - lltype.free(self.cif_descr, flavor='raw') + try: + if self.cif_descr: + lltype.free(self.cif_descr, flavor='raw') + except Exception: # TODO: happens for templates, why? + pass def __repr__(self): return "CPPMethod: %s" % self.prototype() @@ -441,80 +448,12 @@ assert 0, "you should never have a pre-built instance of this!" -class CPPFunction(CPPMethod): - """Global (namespaced) / static function dispatcher.""" - - _immutable_ = True - - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - return capi.C_NULL_OBJECT - - def __repr__(self): - return "CPPFunction: %s" % self.prototype() - - -class CPPTemplatedCall(CPPMethod): - """Method dispatcher that first resolves the template instance.""" - - _attrs_ = ['space', 'templ_args'] - _immutable_ = True - - def __init__(self, space, templ_args, declaring_scope, method_index, arg_defs, args_required): - self.space = space - self.templ_args = templ_args - # TODO: might have to specialize for CPPTemplatedCall on CPPMethod/CPPFunction here - CPPMethod.__init__(self, space, declaring_scope, method_index, arg_defs, args_required) - - def call(self, cppthis, args_w, useffi): - assert lltype.typeOf(cppthis) == capi.C_OBJECT - for i in range(len(args_w)): - try: - s = self.space.text_w(args_w[i]) - except OperationError: - s = self.space.text_w(self.space.getattr(args_w[i], self.space.newtext('__name__'))) - s = capi.c_resolve_name(self.space, s) - if s != self.templ_args[i]: - raise oefmt(self.space.w_TypeError, - "non-matching template (got %s where %s expected)", - s, self.templ_args[i]) - return W_CPPBoundMethod(cppthis, self, useffi) - - def bound_call(self, cppthis, args_w, useffi): - return CPPMethod.call(self, cppthis, args_w, useffi) - - def __repr__(self): - return "CPPTemplatedCall: %s" % self.prototype() - - -class CPPConstructor(CPPMethod): - """Method dispatcher that constructs new objects. This method can not have - a fast path, as the allocation of the object is currently left to the - reflection layer only, since the C++ class may have an overloaded operator - new, disallowing malloc here.""" - - _immutable_ = True - - @staticmethod - def unpack_cppthis(space, w_cppinstance, declaring_scope): - return rffi.cast(capi.C_OBJECT, declaring_scope.handle) - - def call(self, cppthis, args_w, useffi): - # Note: this does not return a wrapped instance, just a pointer to the - # new instance; the overload must still wrap it before returning. Also, - # cppthis is declaring_scope.handle (as per unpack_cppthis(), above). - return CPPMethod.call(self, cppthis, args_w, useffi) - - def __repr__(self): - return "CPPConstructor: %s" % self.prototype() - - class CPPSetItem(CPPMethod): """Method dispatcher specific to Python's __setitem__ mapped onto C++'s operator[](int). The former function takes an extra argument to assign to the return type of the latter.""" - _immutable_ = True + _attrs_ = [] def call(self, cppthis, args_w, useffi): end = len(args_w)-1 @@ -528,46 +467,44 @@ class W_CPPOverload(W_Root): - """Dispatcher that is actually available at the app-level: it is a - collection of (possibly) overloaded methods or functions. It calls these - in order and deals with error handling and reporting.""" + """App-level dispatcher: controls a collection of (potentially) overloaded methods + or functions. Calls these in order and deals with error handling and reporting.""" - _attrs_ = ['space', 'scope', 'functions', 'flags'] + _attrs_ = ['space', 'scope', 'functions', 'flags', 'w_this'] _immutable_fields_ = ['scope', 'functions[*]'] - def __init__(self, space, declaring_scope, functions): - self.space = space - self.scope = declaring_scope - assert len(functions) + def __init__(self, space, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + self.space = space + self.scope = declaring_scope from rpython.rlib import debug self.functions = debug.make_sure_not_resized(functions) - self.flags = 0 - self.flags |= OVERLOAD_FLAGS_USE_FFI + self.flags = flags + self.w_this = self.space.w_None - # allow user to determine ffi use rules per overload - def fget_useffi(self, space): - return space.newbool(bool(self.flags & OVERLOAD_FLAGS_USE_FFI)) + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound, so no new instance needed + cppol = W_CPPOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound - @unwrap_spec(value=bool) - def fset_useffi(self, space, value): - if space.is_true(value): - self.flags |= OVERLOAD_FLAGS_USE_FFI + @unwrap_spec(args_w='args_w') + def call(self, args_w): + if self.space.is_w(self.w_this, self.space.w_None) and len(args_w): + w_this = args_w[0] + args_w = args_w[1:] else: - self.flags &= ~OVERLOAD_FLAGS_USE_FFI - - @jit.elidable_promote() - def is_static(self): - if isinstance(self.functions[0], CPPFunction): - return self.space.w_True - return self.space.w_False + w_this = self.w_this + cppinstance = self.space.interp_w(W_CPPInstance, w_this) + cppinstance._nullcheck() + if not capi.c_is_subtype(self.space, cppinstance.clsdecl, self.scope): + raise oefmt(self.space.w_TypeError, + "cannot pass %T instance as %s", w_this, self.scope.name) + return self.call_impl(cppinstance.get_cppthis(self.scope), args_w) @jit.unroll_safe - @unwrap_spec(args_w='args_w') - def call(self, w_cppinstance, args_w): - # instance handling is specific to the function type only, so take it out - # of the loop over function overloads - cppthis = self.functions[0].unpack_cppthis( - self.space, w_cppinstance, self.functions[0].scope) + def call_impl(self, cppthis, args_w): assert lltype.typeOf(cppthis) == capi.C_OBJECT # The following code tries out each of the functions in order. If @@ -625,38 +562,96 @@ sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) + # allow user to determine ffi use rules per overload + def fget_useffi(self, space): + return space.newbool(bool(self.flags & OVERLOAD_FLAGS_USE_FFI)) + + @unwrap_spec(value=bool) + def fset_useffi(self, space, value): + if space.is_true(value): + self.flags |= OVERLOAD_FLAGS_USE_FFI + else: + self.flags &= ~OVERLOAD_FLAGS_USE_FFI + + def fget_doc(self, space): + return self.prototype() + def __repr__(self): return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', - is_static = interp2app(W_CPPOverload.is_static), - call = interp2app(W_CPPOverload.call), + __get__ = interp2app(W_CPPOverload.descr_get), + __call__ = interp2app(W_CPPOverload.call), __useffi__ = GetSetProperty(W_CPPOverload.fget_useffi, W_CPPOverload.fset_useffi), - prototype = interp2app(W_CPPOverload.prototype), + __doc__ = GetSetProperty(W_CPPOverload.fget_doc) ) +# overload collection of static (class and free) functions; these differ +# from methods only in the handling of 'cppthis' +class W_CPPStaticOverload(W_CPPOverload): + _attrs_ = [] + + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if isinstance(w_cppinstance, W_CPPInstance): + # two possibilities: this is a static function called on an + # instance and w_this must not be set, or a free function rebound + # onto a class and w_this should be set + cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance) + if cppinstance.clsdecl.handle != self.scope.handle: + cppol = W_CPPStaticOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound + return self # unbound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + if not self.space.is_w(self.w_this, self.space.w_None): + # free function used as bound method, put self back into args_w + cppinstance = self.space.interp_w(W_CPPInstance, self.w_this) + cppinstance._nullcheck() + args_w = [self.w_this] + args_w + return self.call_impl(capi.C_NULL_OBJECT, args_w) + + def __repr__(self): + return "W_CPPStaticOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPStaticOverload.typedef = TypeDef( + 'CPPStaticOverload', + __get__ = interp2app(W_CPPStaticOverload.descr_get), + __call__ = interp2app(W_CPPStaticOverload.call), + __useffi__ = GetSetProperty(W_CPPStaticOverload.fget_useffi, W_CPPStaticOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPStaticOverload.fget_doc) +) + + class W_CPPConstructorOverload(W_CPPOverload): - @jit.elidable_promote() - def is_static(self): - return self.space.w_False + _attrs_ = [] - @jit.elidable_promote() - def unpack_cppthis(self, w_cppinstance): - return rffi.cast(capi.C_OBJECT, self.scope.handle) + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound (TODO: probably useless) + cppol = W_CPPConstructorOverload(self.space, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound - @jit.unroll_safe @unwrap_spec(args_w='args_w') - def call(self, w_cppinstance, args_w): + def call(self, args_w): # TODO: factor out the following: if capi.c_is_abstract(self.space, self.scope.handle): raise oefmt(self.space.w_TypeError, "cannot instantiate abstract class '%s'", self.scope.name) - w_result = W_CPPOverload.call(self, w_cppinstance, args_w) + if self.space.is_w(self.w_this, self.space.w_None) and len(args_w): + cppinstance = self.space.interp_w(W_CPPInstance, args_w[0]) + args_w = args_w[1:] + else: + cppinstance = self.space.interp_w(W_CPPInstance, self.w_this) + w_result = self.call_impl(rffi.cast(capi.C_OBJECT, self.scope.handle), args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) - cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) @@ -666,43 +661,203 @@ W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', - is_static = interp2app(W_CPPConstructorOverload.is_static), - call = interp2app(W_CPPConstructorOverload.call), - prototype = interp2app(W_CPPConstructorOverload.prototype), + __get__ = interp2app(W_CPPConstructorOverload.descr_get), + __call__ = interp2app(W_CPPConstructorOverload.call), + __doc__ = GetSetProperty(W_CPPConstructorOverload.fget_doc) ) -class W_CPPTemplateOverload(W_CPPOverload): +class TemplateOverloadMixin(object): + """Mixin to instantiate templated methods/functions.""" + + _mixin_ = True + + def construct_template_args(self, w_args): + space = self.space + tmpl_args = '' + for i in range(space.len_w(w_args)): + w_obj = space.getitem(w_args, space.newint(i)) + if space.isinstance_w(w_obj, space.w_text): + s = space.text_w(w_obj) # string describing type + elif space.isinstance_w(w_obj, space.w_type): + try: + # cppyy bound types + name = space.getattr(w_obj, space.newtext('__cppname__')) + except OperationError: + # generic python types + name = space.getattr(w_obj, space.newtext('__name__')) + s = space.text_w(name) + else: + # builtin types etc. + s = space.text_w(space.str(w_obj)) + # map python types -> C++ types + if s == 'str': s = 'std::string' + if i != 0: tmpl_args += ', ' + tmpl_args += s + return tmpl_args + + def find_method_template(self, name, proto = ''): + # find/instantiate new callable function + space = self.space + cppmeth = capi.c_get_method_template(space, self.scope, name, proto) + if not cppmeth: + raise oefmt(self.space.w_AttributeError, + "scope '%s' has no function %s", self.scope.name, name) + + funcs = [] + ftype = self.scope._make_cppfunction(name, cppmeth, funcs) + if ftype & FUNCTION_IS_STATIC: + cppol = W_CPPStaticOverload(space, self.scope, funcs[:], self.flags) + else: + cppol = W_CPPOverload(space, self.scope, funcs[:], self.flags) + return cppol + + def instantiation_from_args(self, name, args_w): + # try to match with run-time instantiations + for cppol in self.master.overloads.values(): + try: + cppol.descr_get(self.w_this, []).call(args_w) + except Exception: + pass # completely ignore for now; have to see whether errors become confusing + + # if all failed, then try to deduce from argument types + w_types = self.space.newtuple([self.space.type(obj_w) for obj_w in args_w]) + proto = self.construct_template_args(w_types) + method = self.find_method_template(name, proto) + + # only cache result if the name retains the full template + if len(method.functions) == 1: + fullname = capi.c_method_full_name(self.space, method.functions[0].cppmethod) + if 0 <= fullname.rfind('>'): + self.master.overloads[fullname] = method + + return method.descr_get(self.w_this, []).call(args_w) + + def getitem_impl(self, name, args_w): + space = self.space + + if space.isinstance_w(args_w[0], space.w_tuple): + w_args = args_w[0] + else: + w_args = space.newtuple(args_w) + + tmpl_args = self.construct_template_args(w_args) + fullname = name+'<'+tmpl_args+'>' + try: + method = self.master.overloads[fullname] + except KeyError: + method = self.find_method_template(fullname) + + # cache result (name is always full templated name) + self.master.overloads[fullname] = method + + return method.descr_get(self.w_this, []) + + +class W_CPPTemplateOverload(W_CPPOverload, TemplateOverloadMixin): + """App-level dispatcher to allow both lookup/instantiation of templated methods and + dispatch among overloads between templated and non-templated method.""" + + _attrs_ = ['name', 'overloads', 'master'] + _immutable_fields_ = ['name'] + + def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + W_CPPOverload.__init__(self, space, declaring_scope, functions, flags) + self.name = name + self.overloads = {} + self.master = self + @unwrap_spec(args_w='args_w') - def __getitem__(self, args_w): - pass + def descr_get(self, w_cppinstance, args_w): + # like W_CPPOverload, but returns W_CPPTemplateOverload + if self.space.is_w(w_cppinstance, self.space.w_None): + return self # unbound, so no new instance needed + cppol = W_CPPTemplateOverload(self.space, self.name, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + return cppol # bound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + # direct call: either pick non-templated overload or attempt to deduce + # the template instantiation from the argument types + + # try existing overloads or compile-time instantiations + try: + return W_CPPOverload.call(self, args_w) + except Exception: + pass + + return self.instantiation_from_args(self.name, args_w) + + @unwrap_spec(args_w='args_w') + def getitem(self, args_w): + return self.getitem_impl(self.name, args_w) def __repr__(self): return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] W_CPPTemplateOverload.typedef = TypeDef( 'CPPTemplateOverload', - __getitem__ = interp2app(W_CPPTemplateOverload.call), + __get__ = interp2app(W_CPPTemplateOverload.descr_get), + __getitem__ = interp2app(W_CPPTemplateOverload.getitem), + __call__ = interp2app(W_CPPTemplateOverload.call), + __useffi__ = GetSetProperty(W_CPPTemplateOverload.fget_useffi, W_CPPTemplateOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPTemplateOverload.fget_doc) ) +class W_CPPTemplateStaticOverload(W_CPPStaticOverload, TemplateOverloadMixin): + """App-level dispatcher to allow both lookup/instantiation of templated methods and + dispatch among overloads between templated and non-templated method.""" -class W_CPPBoundMethod(W_Root): - _attrs_ = ['cppthis', 'method', 'useffi'] + _attrs_ = ['name', 'overloads', 'master'] + _immutable_fields_ = ['name'] - def __init__(self, cppthis, method, useffi): - self.cppthis = cppthis - self.method = method - self.useffi = useffi + def __init__(self, space, name, declaring_scope, functions, flags = OVERLOAD_FLAGS_USE_FFI): + W_CPPStaticOverload.__init__(self, space, declaring_scope, functions, flags) + self.name = name + self.overloads = {} + self.master = self - def __call__(self, args_w): - return self.method.bound_call(self.cppthis, args_w, self.useffi) + @unwrap_spec(args_w='args_w') + def descr_get(self, w_cppinstance, args_w): + # like W_CPPStaticOverload, but returns W_CPPTemplateStaticOverload + if isinstance(w_cppinstance, W_CPPInstance): + cppinstance = self.space.interp_w(W_CPPInstance, w_cppinstance) + if cppinstance.clsdecl.handle != self.scope.handle: + cppol = W_CPPTemplateStaticOverload(self.space, self.name, self.scope, self.functions, self.flags) + cppol.w_this = w_cppinstance + cppol.master = self.master + return cppol # bound + return self # unbound + + @unwrap_spec(args_w='args_w') + def call(self, args_w): + # direct call: either pick non-templated overload or attempt to deduce + # the template instantiation from the argument types + + # try existing overloads or compile-time instantiations + try: + return W_CPPStaticOverload.call(self, args_w) + except Exception: + pass + + # try new instantiation + return self.instantiation_from_args(self.name, args_w) + + @unwrap_spec(args_w='args_w') + def getitem(self, args_w): + return self.getitem_impl(self.name, args_w) def __repr__(self): - return "W_CPPBoundMethod(%s)" % self.method.prototype() + return "W_CPPTemplateStaticOverload(%s)" % [f.prototype() for f in self.functions] -W_CPPBoundMethod.typedef = TypeDef( - 'CPPBoundMethod', - __call__ = interp2app(W_CPPBoundMethod.__call__), +W_CPPTemplateStaticOverload.typedef = TypeDef( + 'CPPTemplateStaticOverload', + __get__ = interp2app(W_CPPTemplateStaticOverload.descr_get), + __getitem__ = interp2app(W_CPPTemplateStaticOverload.getitem), + __call__ = interp2app(W_CPPTemplateStaticOverload.call), + __useffi__ = GetSetProperty(W_CPPTemplateStaticOverload.fget_useffi, W_CPPTemplateStaticOverload.fset_useffi), + __doc__ = GetSetProperty(W_CPPTemplateStaticOverload.fget_doc) ) @@ -813,38 +968,48 @@ return space.w_False #----- - +# Classes for data members: +# +# W_CPPScopeDecl : scope base class +# W_CPPNamespaceDecl : namespace scope +# W_CPPClassDecl : class scope +# +# Namespaces and classes mainly differ in lookups of methods. Whereas classes +# can grown templated methods, namespaces are wide open to any additions. Such +# lookups are triggered from get_scoped_pycppitem (in pythonify.py). Further +# specialization is done on the type of data/methods that each can have. class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'flags', 'name', 'overloads', 'datamembers'] _immutable_fields_ = ['handle', 'name'] def __init__(self, space, opaque_handle, final_scoped_name): self.space = space - self.name = final_scoped_name assert lltype.typeOf(opaque_handle) == capi.C_SCOPE self.handle = opaque_handle - self.methods = {} - # Do not call "self._build_methods()" here, so that a distinction can + self.flags = 0 + self.name = final_scoped_name + self.overloads = {} + # Do not call "self._build_overloadss()" here, so that a distinction can # be made between testing for existence (i.e. existence in the cache # of classes) and actual use. Point being that a class can use itself, # e.g. as a return type or an argument to one of its methods. self.datamembers = {} - # Idem as for self.methods: a type could hold itself by pointer. + # Idem as for self.overloads: a type could hold itself by pointer. def get_method_names(self): - return self.space.newlist([self.space.newtext(name) for name in self.methods]) + return self.space.newlist([self.space.newtext(name) for name in self.overloads]) @unwrap_spec(name='text') def get_overload(self, name): try: - return self.methods[name] + return self.overloads[name] except KeyError: pass - new_method = self.find_overload(name) - self.methods[name] = new_method - return new_method + new_ol = self.find_overload(name) + self.overloads[name] = new_ol + return new_ol def get_datamember_names(self): return self.space.newlist([self.space.newtext(name) for name in self.datamembers]) @@ -880,18 +1045,19 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'overloads', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def _make_cppfunction(self, pyname, index): - num_args = capi.c_method_num_args(self.space, self, index) - args_required = capi.c_method_req_args(self.space, self, index) + def _make_cppfunction(self, pyname, cppmeth, funcs): + num_args = capi.c_method_num_args(self.space, cppmeth) + args_required = capi.c_method_req_args(self.space, cppmeth) arg_defs = [] for i in range(num_args): - arg_type = capi.c_method_arg_type(self.space, self, index, i) - arg_dflt = capi.c_method_arg_default(self.space, self, index, i) + arg_type = capi.c_method_arg_type(self.space, cppmeth, i) + arg_dflt = capi.c_method_arg_default(self.space, cppmeth, i) arg_defs.append((arg_type, arg_dflt)) - return CPPFunction(self.space, self, index, arg_defs, args_required) + funcs.append(CPPMethod(self.space, self, cppmeth, arg_defs, args_required)) + return FUNCTION_IS_GLOBAL def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) @@ -907,14 +1073,20 @@ def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) - if not indices: - raise self.missing_attribute_error(meth_name) - cppfunctions = [] - for meth_idx in indices: - f = self._make_cppfunction(meth_name, meth_idx) - cppfunctions.append(f) - overload = W_CPPOverload(self.space, self, cppfunctions) - return overload + if indices: + cppfunctions, ftype = [], 0 + templated = False + for idx in indices: + cppmeth = capi.c_get_method(self.space, self, idx) + ftype |= self._make_cppfunction(meth_name, cppmeth, cppfunctions) + if capi.c_method_is_template(self.space, self, idx): + templated = True From pypy.commits at gmail.com Mon Jun 11 01:27:56 2018 From: pypy.commits at gmail.com (Matti Picus) Date: Sun, 10 Jun 2018 22:27:56 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch, probably many mistakes in merge Message-ID: <5b1e085c.1c69fb81.14c6b.1ba5@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r94752:3f63d5b725cc Date: 2018-06-10 21:44 -0700 http://bitbucket.org/pypy/pypy/changeset/3f63d5b725cc/ Log: merge unicode-utf8 into branch, probably many mistakes in merge diff too long, truncating to 2000 out of 13167 lines diff --git a/TODO b/TODO new file mode 100644 --- /dev/null +++ b/TODO @@ -0,0 +1,6 @@ +* find a better way to run "find" without creating the index storage, + if one is not already readily available +* write the correct jit_elidable in _get_index_storage +* improve performance of splitlines +* fix _pypyjson to not use a wrapped dict when decoding an object +* make sure we review all the places that call ord(unichr) to check for ValueErrors diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -28,6 +28,10 @@ The reverse-debugger branch has been merged. For more information, see https://bitbucket.org/pypy/revdb +.. branch: unicode-utf8-re +.. branch: utf8-io + +Utf8 handling for unicode .. branch: pyparser-improvements-3 diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -58,6 +58,7 @@ self.space = space self.compile_info = compile_info self.root_node = n + # used in f-strings self.recursive_parser = recursive_parser def build_ast(self): diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -1264,9 +1264,6 @@ class AppTestCompiler: - def setup_class(cls): - cls.w_maxunicode = cls.space.wrap(sys.maxunicode) - def test_docstring_not_loaded(self): import io, dis, sys ns = {} diff --git a/pypy/interpreter/astcompiler/validate.py b/pypy/interpreter/astcompiler/validate.py --- a/pypy/interpreter/astcompiler/validate.py +++ b/pypy/interpreter/astcompiler/validate.py @@ -409,7 +409,7 @@ def visit_Str(self, node): space = self.space w_type = space.type(node.s) - if w_type != space.w_unicode: + if w_type != space.w_str: raise oefmt(space.w_TypeError, "non-string type in Str") def visit_Bytes(self, node): diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -3,7 +3,7 @@ from rpython.rlib.cache import Cache from rpython.tool.uid import HUGEVAL_BYTES -from rpython.rlib import jit, types +from rpython.rlib import jit, types, rutf8 from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.objectmodel import (we_are_translated, newlist_hint, compute_unique_id, specialize, not_rpython) @@ -251,6 +251,12 @@ def text_w(self, space): self._typed_unwrap_error(space, "string") + def utf8_w(self, space): + self._typed_unwrap_error(space, "unicode") + + def convert_to_w_unicode(self, space): + self._typed_unwrap_error(space, "unicode") + def bytearray_list_of_chars_w(self, space): self._typed_unwrap_error(space, "bytearray") @@ -1066,7 +1072,7 @@ """ return None - def listview_unicode(self, w_list): + def listview_utf8(self, w_list): """ Return a list of unwrapped unicode out of a list of unicode. If the argument is not a list or does not contain only unicode, return None. May return None anyway. @@ -1096,8 +1102,15 @@ def newlist_bytes(self, list_s): return self.newlist([self.newbytes(s) for s in list_s]) - def newlist_unicode(self, list_u): - return self.newlist([self.newunicode(u) for u in list_u]) + def newlist_utf8(self, list_u, is_ascii): + l_w = [None] * len(list_u) + for i, item in enumerate(list_u): + if not is_ascii: + length = rutf8.check_utf8(item, True) + else: + length = len(item) + l_w[i] = self.newutf8(item, length) + return self.newlist(l_w) def newlist_int(self, list_i): return self.newlist([self.newint(i) for i in list_i]) @@ -1702,15 +1715,16 @@ assert w_obj is not None return w_obj.float_w(self, allow_conversion) - @specialize.argtype(1) - def unicode_w(self, w_obj): - assert w_obj is not None - return w_obj.unicode_w(self) + def utf8_w(self, w_obj): + return w_obj.utf8_w(self) + + def convert_to_w_unicode(self, w_obj): + return w_obj.convert_to_w_unicode(self) def unicode0_w(self, w_obj): "Like unicode_w, but rejects strings with NUL bytes." from rpython.rlib import rstring - result = w_obj.unicode_w(self) + result = w_obj.utf8_w(self).decode('utf8') if u'\x00' in result: raise oefmt(self.w_ValueError, "argument must be a unicode string without NUL " @@ -1733,6 +1747,23 @@ w_obj = self.fsencode(w_obj) return self.bytesbuf0_w(w_obj) + def convert_arg_to_w_unicode(self, w_obj, strict=None): + # XXX why convert_to_w_unicode does something slightly different? + from pypy.objspace.std.unicodeobject import W_UnicodeObject + assert not hasattr(self, 'is_fake_objspace') + return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict) + + def utf8_len_w(self, w_obj): + w_obj = self.convert_arg_to_w_unicode(w_obj) + return w_obj._utf8, w_obj._len() + + def realutf8_w(self, w_obj): + # Like utf8_w(), but only works if w_obj is really of type + # 'unicode'. On Python 3 this is the same as utf8_w(). + if not self.isinstance_w(w_obj, self.w_unicode): + raise oefmt(self.w_TypeError, "argument must be a unicode") + return self.utf8_w(w_obj) + def bytesbuf0_w(self, w_obj): # Like bytes0_w(), but also accept a read-only buffer. from rpython.rlib import rstring @@ -2078,7 +2109,7 @@ 'float_w', 'uint_w', 'bigint_w', - 'unicode_w', + 'utf8_w', 'unwrap', 'is_true', 'is_w', diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -174,6 +174,9 @@ def visit_unicode(self, el, app_sig): self.checked_space_method(el, app_sig) + def visit_utf8(self, el, app_sig): + self.checked_space_method(el, app_sig) + def visit_fsencode(self, el, app_sig): self.checked_space_method(el, app_sig) @@ -326,6 +329,9 @@ def visit_unicode(self, typ): self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),)) + def visit_utf8(self, typ): + self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),)) + def visit_fsencode(self, typ): self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),)) @@ -497,6 +503,9 @@ def visit_text0(self, typ): self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),)) + def visit_utf8(self, typ): + self.unwrap.append("space.utf8_w(%s)" % (self.nextarg(),)) + def visit_fsencode(self, typ): self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),)) diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -1,4 +1,5 @@ # coding: utf-8 +from rpython.rlib import rutf8 from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter import unicodehelper @@ -91,9 +92,11 @@ if encoding is None: substr = s[ps:q] else: + unicodehelper.check_utf8_or_raise(space, s, ps, q) substr = decode_unicode_utf8(space, s, ps, q) - v = unicodehelper.decode_unicode_escape(space, substr) - return space.newunicode(v) + r = unicodehelper.decode_unicode_escape(space, substr) + v, length = r + return space.newutf8(v, length) assert 0 <= ps <= q substr = s[ps : q] @@ -135,15 +138,12 @@ # the backslash we just wrote, we emit "\u005c" # instead. lis.append("u005c") - if ord(s[ps]) & 0x80: # XXX inefficient - w, ps = decode_utf8(space, s, ps, end) - for c in w: - # The equivalent of %08x, which is not supported by RPython. - # 7 zeroes are enough for the unicode range, and the - # result still fits in 32-bit. - hexa = hex(ord(c) + 0x10000000) - lis.append('\\U0') - lis.append(hexa[3:]) # Skip 0x and the leading 1 + if ord(s[ps]) & 0x80: + cp = rutf8.codepoint_at_pos(s, ps) + hexa = hex(cp + 0x10000000) + lis.append('\\U0') + lis.append(hexa[3:]) # Skip 0x and the leading 1 + ps = rutf8.next_codepoint_pos(s, ps) else: lis.append(s[ps]) ps += 1 @@ -250,20 +250,29 @@ ch >= 'A' and ch <= 'F') -def decode_utf8(space, s, ps, end): +def check_utf8(space, s, ps, end): assert ps >= 0 pt = ps # while (s < end && *s != '\\') s++; */ /* inefficient for u".." while ps < end and ord(s[ps]) & 0x80: ps += 1 - u = unicodehelper.decode_utf8(space, s[pt:ps]) - return u, ps + try: + rutf8.check_utf8(s, True, pt, ps) + except rutf8.CheckError as e: + lgt, flag = rutf8.check_utf8(s, True, pt, e.pos) + unicodehelper.decode_error_handler(space)('strict', 'utf8', + 'invalid utf-8', s, pt + lgt, pt + lgt + 1) + return s[pt:ps] def decode_utf8_recode(space, s, ps, end, recode_encoding): - u, ps = decode_utf8(space, s, ps, end) - w_v = unicodehelper.encode(space, space.newunicode(u), recode_encoding) + p = ps + while p < end and ord(s[p]) & 0x80: + p += 1 + lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p) + w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt), + recode_encoding) v = space.bytes_w(w_v) - return v, ps + return v, p def raise_app_valueerror(space, msg): raise OperationError(space.w_ValueError, space.newtext(msg)) diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py --- a/pypy/interpreter/pyparser/test/test_parsestring.py +++ b/pypy/interpreter/pyparser/test/test_parsestring.py @@ -10,7 +10,7 @@ assert space.bytes_w(w_ret) == value elif isinstance(value, unicode): assert space.type(w_ret) == space.w_unicode - assert space.unicode_w(w_ret) == value + assert space.utf8_w(w_ret).decode('utf8') == value else: assert False @@ -61,7 +61,7 @@ s = "u'\x81'" s = s.decode("koi8-u").encode("utf8")[1:] w_ret = parsestring.parsestr(self.space, 'koi8-u', s) - ret = space.unwrap(w_ret) + ret = w_ret._utf8.decode('utf8') assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'") def test_unicode_pep414(self): @@ -131,7 +131,4 @@ def test_decode_unicode_utf8(self): buf = parsestring.decode_unicode_utf8(self.space, 'u"\xf0\x9f\x92\x8b"', 2, 6) - if sys.maxunicode == 65535: - assert buf == r"\U0000d83d\U0000dc8b" - else: - assert buf == r"\U0001f48b" + assert buf == r"\U0001f48b" diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -555,25 +555,32 @@ w_app_g3_r = space.wrap(app_g3_r) space.raises_w(space.w_TypeError, space.call_function,w_app_g3_r,w(1.0)) - def test_interp2app_unwrap_spec_unicode(self): + def test_interp2app_unwrap_spec_utf8(self): space = self.space w = space.wrap - def g3_u(space, uni): - return space.wrap(len(uni)) + def g3_u(space, utf8): + return space.wrap(utf8) app_g3_u = gateway.interp2app_temp(g3_u, unwrap_spec=[gateway.ObjSpace, - unicode]) + 'utf8']) w_app_g3_u = space.wrap(app_g3_u) + encoded = u"gęść".encode('utf8') assert self.space.eq_w( - space.call_function(w_app_g3_u, w(u"foo")), - w(3)) + space.call_function(w_app_g3_u, w(u"gęść")), + w(encoded)) assert self.space.eq_w( - space.call_function(w_app_g3_u, w("baz")), - w(3)) + space.call_function(w_app_g3_u, w("foo")), + w("foo")) space.raises_w(space.w_TypeError, space.call_function, w_app_g3_u, w(None)) space.raises_w(space.w_TypeError, space.call_function, w_app_g3_u, w(42)) + w_ascii = space.appexec([], """(): + import sys + return sys.getdefaultencoding() == 'ascii'""") + if space.is_true(w_ascii): + raises(gateway.OperationError, space.call_function, w_app_g3_u, + w("\x80")) def test_interp2app_unwrap_spec_unwrapper(self): space = self.space diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py --- a/pypy/interpreter/test/test_objspace.py +++ b/pypy/interpreter/test/test_objspace.py @@ -210,9 +210,7 @@ space = self.space w = space.wrap assert space.text0_w(w("123")) == "123" - exc = space.raises_w(space.w_ValueError, space.text0_w, w("123\x004")) - assert space.unicode0_w(w(u"123")) == u"123" - exc = space.raises_w(space.w_ValueError, space.unicode0_w, w(u"123\x004")) + space.raises_w(space.w_ValueError, space.text0_w, w("123\x004")) def test_text_w(self): space = self.space diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,5 +1,6 @@ import py import pytest +from hypothesis import given, strategies import struct import sys from pypy.interpreter.unicodehelper import ( @@ -10,23 +11,13 @@ class Hit(Exception): pass -class FakeSpace: - def __getattr__(self, name): - if name in ('w_UnicodeEncodeError', 'w_UnicodeDecodeError'): - raise Hit - raise AttributeError(name) +from pypy.interpreter.unicodehelper import str_decode_utf8 +from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii +from pypy.interpreter import unicodehelper as uh +from pypy.module._codecs.interp_codecs import CodecState - -def test_encode_utf8(): - space = FakeSpace() - assert encode_utf8(space, u"abc") == "abc" - assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4" - py.test.raises(Hit, encode_utf8, space, u"\ud800") - py.test.raises(Hit, encode_utf8, space, u"\udc00") - # for the following test, go to lengths to avoid CPython's optimizer - # and .pyc file storage, which collapse the two surrogates into one - c = u"\udc00" - py.test.raises(Hit, encode_utf8, space, u"\ud800" + c) +def decode_utf8(u): + return str_decode_utf8(u, True, "strict", None) def test_encode_utf8_allow_surrogates(): sp = FakeSpace() @@ -45,18 +36,33 @@ assert got == "\xed\xa0\x80\xed\xb0\x80" def test_decode_utf8(): - space = FakeSpace() - assert decode_utf8(space, "abc") == u"abc" - assert decode_utf8(space, "\xe1\x88\xb4") == u"\u1234" - py.test.raises(Hit, decode_utf8, space, "\xed\xa0\x80") - py.test.raises(Hit, decode_utf8, space, "\xed\xb0\x80") - py.test.raises(Hit, decode_utf8, space, "\xed\xa0\x80\xed\xb0\x80") - got = decode_utf8(space, "\xf0\x90\x80\x80") + assert decode_utf8("abc") == ("abc", 3, 3) + assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1) + assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1) + py.test.raises(Hit, decode_utf8, "\xed\xa0\x80") + py.test.raises(Hit, decode_utf8, "\xed\xb0\x80") + py.test.raises(Hit, decode_utf8, "\xed\xa0\x80\xed\xb0\x80") + got = decode_utf8("\xf0\x90\x80\x80") if sys.maxunicode > 65535: assert map(ord, got) == [0x10000] else: assert map(ord, got) == [55296, 56320] +def test_utf8_encode_ascii(): + assert utf8_encode_ascii("abc", "??", "??") == "abc" + def eh(errors, encoding, reason, p, start, end): + lst.append((errors, encoding, p, start, end)) + return "", end + lst = [] + input = u"\u1234".encode("utf8") + assert utf8_encode_ascii(input, "??", eh) == "" + assert lst == [("??", "ascii", input, 0, 1)] + lst = [] + input = u"\u1234\u5678abc\u8765\u4321".encode("utf8") + assert utf8_encode_ascii(input, "??", eh) == "abc" + assert lst == [("??", "ascii", input, 0, 2), + ("??", "ascii", input, 5, 7)] + def test_decode_utf8_allow_surrogates(): sp = FakeSpace() assert decode_utf8(sp, "\xed\xa0\x80", allow_surrogates=True) == u"\ud800" @@ -90,10 +96,58 @@ return unicode_encode_utf_32_be( u"<%s>" % unich, 3, None, errorhandler, allow_surrogates=False) - assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert (replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') with pytest.raises(UnicodeDecodeError): str_decode_utf_32_be(b"\x00\x00\xdc\x80", 4, None) + + + at given(strategies.text()) +def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end + assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") + +def test_str_decode_ascii(): + assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3) + def eh(errors, encoding, reason, p, start, end): + lst.append((errors, encoding, p, start, end)) + return u"\u1234\u5678".encode("utf8"), end + lst = [] + input = "\xe8" + exp = u"\u1234\u5678".encode("utf8") + assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2) + assert lst == [("??", "ascii", input, 0, 1)] + lst = [] + input = "\xe8\xe9abc\xea\xeb" + assert str_decode_ascii(input, "??", True, eh) == ( + exp + exp + "abc" + exp + exp, 7, 11) + assert lst == [("??", "ascii", input, 0, 1), + ("??", "ascii", input, 1, 2), + ("??", "ascii", input, 5, 6), + ("??", "ascii", input, 6, 7)] + + at given(strategies.text()) +def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) + assert r == u.encode("raw-unicode-escape") + + at given(strategies.text()) +def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) + assert r == u.encode("unicode-escape") + +def test_encode_decimal(space): + assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' + with pytest.raises(ValueError): + uh.unicode_encode_decimal(u' 12, \u1234 '.encode('utf8'), None) + state = space.fromcache(CodecState) + handler = state.encode_error_handler + assert uh.unicode_encode_decimal( + u'u\u1234\u1235v'.encode('utf8'), 'replace', handler) == 'u??v' + + result = uh.unicode_encode_decimal( + u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) + assert result == '12ሴ' diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,12 +1,12 @@ import sys + from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize -from rpython.rlib.rarithmetic import intmask -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder -from rpython.rlib import runicode -from rpython.rlib.runicode import ( - default_unicode_error_encode, default_unicode_error_decode, - MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) +from rpython.rlib.rstring import StringBuilder +from rpython.rlib import rutf8 +from rpython.rlib.rarithmetic import r_uint, intmask +from rpython.rtyper.lltypesystem import rffi +from pypy.module.unicodedata import unicodedb _WIN32 = sys.platform == 'win32' _MACOSX = sys.platform == 'darwin' @@ -32,16 +32,30 @@ @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, u, + def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): + u_len = rutf8.get_utf8_length(utf8) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newunicode(u), + space.newutf8(utf8, u_len), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) return raise_unicode_exception_encode +def default_error_encode( + errors, encoding, msg, u, startingpos, endingpos): + """A default handler, for tests""" + assert endingpos >= 0 + if errors == 'replace': + return '?', endingpos + if errors == 'ignore': + return '', endingpos + raise ValueError + +def convert_arg_to_w_unicode(space, w_arg, strict=None): + return space.convert_arg_to_w_unicode(w_arg) + # ____________________________________________________________ def fsdecode(space, w_string): @@ -112,27 +126,42 @@ from pypy.objspace.std.unicodeobject import encode_object return encode_object(space, w_data, encoding, errors) -# These functions take and return unwrapped rpython strings and unicodes + +def _has_surrogate(u): + for c in u: + if 0xD800 <= ord(c) <= 0xDFFF: + return True + return False + +# These functions take and return unwrapped rpython strings def decode_unicode_escape(space, string): from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - result, consumed = runicode.str_decode_unicode_escape( - string, len(string), "strict", - final=True, errorhandler=decode_error_handler(space), - unicodedata_handler=unicodedata_handler) - return result + result_utf8, consumed, length = str_decode_unicode_escape( + string, "strict", + final=True, + errorhandler=decode_error_handler(space), + ud_handler=unicodedata_handler) + return result_utf8, length def decode_raw_unicode_escape(space, string): - result, consumed = runicode.str_decode_raw_unicode_escape( - string, len(string), "strict", + result_utf8, consumed, lgt = str_decode_raw_unicode_escape( + string, "strict", final=True, errorhandler=decode_error_handler(space)) - return result + return result_utf8, lgt -def decode_utf8(space, string, allow_surrogates=False): - # Note that Python3 tends to forbid *all* surrogates in utf-8. - # If allow_surrogates=True, then revert to the Python 2 behavior, - # i.e. surrogates are accepted and not treated specially at all. +def check_ascii_or_raise(space, string): + try: + rutf8.check_ascii(string) + except rutf8.CheckError as e: + decode_error_handler(space)('strict', 'ascii', + 'ordinal not in range(128)', string, + e.pos, e.pos + 1) + assert False, "unreachable" + +def check_utf8_or_raise(space, string, start=0, end=-1): + # Surrogates are accepted and not treated specially at all. # If there happen to be two 3-bytes encoding a pair of surrogates, # you still get two surrogate unicode characters in the result. assert isinstance(string, str) @@ -142,61 +171,832 @@ allow_surrogates=allow_surrogates) return result -def encode_utf8(space, uni, allow_surrogates=False): - # Note that Python3 tends to forbid *all* surrogates in utf-8. - # If allow_surrogates=True, then revert to the Python 2 behavior - # which never raises UnicodeEncodeError. Surrogate pairs are then - # allowed, either paired or lone. A paired surrogate is considered - # like the non-BMP character it stands for. See also *_utf8sp(). - assert isinstance(uni, unicode) - return runicode.unicode_encode_utf_8( - uni, len(uni), "strict", - errorhandler=encode_error_handler(space), - allow_surrogates=allow_surrogates) +def str_decode_ascii(s, errors, final, errorhandler): + try: + rutf8.check_ascii(s) + return s, len(s), len(s) + except rutf8.CheckError: + return _str_decode_ascii_slowpath(s, errors, final, errorhandler) -def encode_utf8sp(space, uni): - # Surrogate-preserving utf-8 encoding. Any surrogate character - # turns into its 3-bytes encoding, whether it is paired or not. - # This should always be reversible, and the reverse is - # decode_utf8sp(). - return runicode.unicode_encode_utf8sp(uni, len(uni)) +def _str_decode_ascii_slowpath(s, errors, final, errorhandler): + i = 0 + res = StringBuilder() + while i < len(s): + ch = s[i] + if ord(ch) > 0x7F: + r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)', + s, i, i + 1) + res.append(r) + else: + res.append(ch) + i += 1 + ress = res.build() + lgt = rutf8.check_utf8(ress, True) + return ress, len(s), lgt -def decode_utf8sp(space, string): - # Surrogate-preserving utf-8 decoding. Assuming there is no - # encoding error, it should always be reversible, and the reverse is - # encode_utf8sp(). - return decode_utf8(space, string, allow_surrogates=True) +def str_decode_latin_1(s, errors, final, errorhandler): + try: + rutf8.check_ascii(s) + return s, len(s), len(s) + except rutf8.CheckError: + return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) + +def _str_decode_latin_1_slowpath(s, errors, final, errorhandler): + res = StringBuilder(len(s)) + i = 0 + while i < len(s): + if ord(s[i]) > 0x7F: + while i < len(s) and ord(s[i]) > 0x7F: + rutf8.unichr_as_utf8_append(res, ord(s[i])) + i += 1 + else: + start = i + end = i + 1 + while end < len(s) and ord(s[end]) <= 0x7F: + end += 1 + res.append_slice(s, start, end) + i = end + # cannot be ASCII, cannot have surrogates, I believe + return res.build(), len(s), len(s) + +def utf8_encode_latin_1(s, errors, errorhandler): + try: + rutf8.check_ascii(s) + return s + except rutf8.CheckError: + return _utf8_encode_latin_1_slowpath(s, errors, errorhandler) + +def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): + size = len(s) + result = StringBuilder(size) + index = 0 + pos = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + if ch <= 0xFF: + result.append(chr(ch)) + index += 1 + pos = rutf8.next_codepoint_pos(s, pos) + else: + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while pos < size and rutf8.codepoint_at_pos(s, pos) > 0xFF: + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + msg = "ordinal not in range(256)" + res_8, newindex = errorhandler( + errors, 'latin1', msg, s, startindex, index) + for cp in rutf8.Utf8StringIterator(res_8): + if cp > 0xFF: + errorhandler("strict", 'latin1', msg, s, startindex, index) + result.append(chr(cp)) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) + return result.build() + +def utf8_encode_ascii(s, errors, errorhandler): + """ Don't be confused - this is a slowpath for errors e.g. "ignore" + or an obscure errorhandler + """ + size = len(s) + result = StringBuilder(size) + index = 0 + pos = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + if ch <= 0x7F: + result.append(chr(ch)) + index += 1 + pos = rutf8.next_codepoint_pos(s, pos) + else: + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while pos < size and rutf8.codepoint_at_pos(s, pos) > 0x7F: + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + msg = "ordinal not in range(128)" + res_8, newindex = errorhandler( + errors, 'ascii', msg, s, startindex, index) + for cp in rutf8.Utf8StringIterator(res_8): + if cp > 0x7F: + errorhandler("strict", 'ascii', msg, s, startindex, index) + result.append(chr(cp)) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) + return result.build() + +if sys.platform == 'win32': + def utf8_encode_mbcs(s, errors, errorhandler): + from rpython.rlib import runicode + s = s.decode('utf-8') + slen = len(s) + res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler) + return res + + def str_decode_mbcs(s, errors, final, errorhandler): + from rpython.rlib import runicode + slen = len(s) + res, size = runicode.str_decode_mbcs(s, slen, final=final, errors=errors, + errorhandler=errorhandler) + return res.encode('utf8'), size, len(res) + +def str_decode_utf8(s, errors, final, errorhandler): + """ Same as checking for the valid utf8, but we know the utf8 is not + valid so we're trying to either raise or pack stuff with error handler. + The key difference is that this is call_may_force + """ + slen = len(s) + res = StringBuilder(slen) + pos = 0 + end = len(s) + while pos < end: + ordch1 = ord(s[pos]) + # fast path for ASCII + if ordch1 <= 0x7F: + pos += 1 + res.append(chr(ordch1)) + continue + + if ordch1 <= 0xC1: + r, pos = errorhandler(errors, "utf8", "invalid start byte", + s, pos, pos + 1) + res.append(r) + continue + + pos += 1 + + if ordch1 <= 0xDF: + if pos >= end: + if not final: + pos -= 1 + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos) + res.append(r) + continue + ordch2 = ord(s[pos]) + + if rutf8._invalid_byte_2_of_2(ordch2): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + pos += 1 + res.append(chr(ordch1)) + res.append(chr(ordch2)) + continue + + if ordch1 <= 0xEF: + if (pos + 2) > end: + if not final: + pos -= 1 + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos + 1) + res.append(r) + continue + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) + + if rutf8._invalid_byte_2_of_3(ordch1, ordch2, True): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + elif rutf8._invalid_byte_3_of_3(ordch3): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 1) + res.append(r) + continue + pos += 2 + + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + res.append(chr(ordch1)) + res.append(chr(ordch2)) + res.append(chr(ordch3)) + continue + + if ordch1 <= 0xF4: + if (pos + 3) > end: + if not final: + pos -= 1 + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos) + res.append(r) + continue + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) + ordch4 = ord(s[pos + 2]) + + if rutf8._invalid_byte_2_of_4(ordch1, ordch2): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + elif rutf8._invalid_byte_3_of_4(ordch3): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 1) + res.append(r) + continue + elif rutf8._invalid_byte_4_of_4(ordch4): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 2) + res.append(r) + continue + + pos += 3 + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + res.append(chr(ordch1)) + res.append(chr(ordch2)) + res.append(chr(ordch3)) + res.append(chr(ordch4)) + continue + + r, pos = errorhandler(errors, "utf8", "invalid start byte", + s, pos - 1, pos) + res.append(r) + + r = res.build() + return r, pos, rutf8.check_utf8(r, True) + +hexdigits = "0123456789ABCDEFabcdef" + +def hexescape(builder, s, pos, digits, + encoding, errorhandler, message, errors): + chr = 0 + if pos + digits > len(s): + endinpos = pos + while endinpos < len(s) and s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, endinpos) + builder.append(res) + else: + try: + chr = int(s[pos:pos + digits], 16) + except ValueError: + endinpos = pos + while s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, endinpos) + builder.append(res) + else: + # when we get here, chr is a 32-bit unicode character + try: + builder.append_code(chr) + pos += digits + except ValueError: + message = "illegal Unicode character" + res, pos = errorhandler( + errors, encoding, message, s, pos - 2, pos + digits) + builder.append(res) + return pos + +def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): + size = len(s) + if size == 0: + return '', 0, 0 + + builder = rutf8.Utf8StringBuilder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + if ord(ch) > 0x7F: + builder.append_code(ord(ch)) + else: + builder.append(ch) + pos += 1 + continue + + # - Escapes + pos += 1 + if pos >= size: + message = "\\ at end of string" + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos - 1, size) + builder.append(res) + continue + + ch = s[pos] + pos += 1 + # \x escapes + if ch == '\n': + pass + elif ch == '\\': + builder.append_char('\\') + elif ch == '\'': + builder.append_char('\'') + elif ch == '\"': + builder.append_char('\"') + elif ch == 'b': + builder.append_char('\b') + elif ch == 'f': + builder.append_char('\f') + elif ch == 't': + builder.append_char('\t') + elif ch == 'n': + builder.append_char('\n') + elif ch == 'r': + builder.append_char('\r') + elif ch == 'v': + builder.append_char('\v') + elif ch == 'a': + builder.append_char('\a') + elif '0' <= ch <= '7': + x = ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x << 3) + ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x << 3) + ord(ch) - ord('0') + if x > 0x7F: + builder.append_code(x) + else: + builder.append_char(chr(x)) + # hex escapes + # \xXX + elif ch == 'x': + digits = 2 + message = "truncated \\xXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + # \uXXXX + elif ch == 'u': + digits = 4 + message = "truncated \\uXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + # \UXXXXXXXX + elif ch == 'U': + digits = 8 + message = "truncated \\UXXXXXXXX escape" + pos = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + # \N{name} + elif ch == 'N' and ud_handler is not None: + message = "malformed \\N character escape" + look = pos + + if look < size and s[look] == '{': + # look for the closing brace + while look < size and s[look] != '}': + look += 1 + if look < size and s[look] == '}': + # found a name. look it up in the unicode database + message = "unknown Unicode character name" + name = s[pos + 1:look] + code = ud_handler.call(name) + if code < 0: + res, pos = errorhandler( + errors, "unicodeescape", message, + s, pos - 1, look + 1) + builder.append(res) + continue + pos = look + 1 + builder.append_code(code) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos - 1, look + 1) + builder.append(res) + else: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos - 1, look + 1) + builder.append(res) + else: + builder.append_char('\\') + builder.append_code(ord(ch)) + + return builder.build(), pos, builder.getlength() + +def wcharpsize2utf8(space, wcharp, size): + """Safe version of rffi.wcharpsize2utf8. + + Raises app-level ValueError if any wchar value is outside the valid + codepoint range. + """ + try: + return rffi.wcharpsize2utf8(wcharp, size) + except ValueError: + raise oefmt(space.w_ValueError, + "character is not in range [U+0000; U+10ffff]") + + +# ____________________________________________________________ +# Raw unicode escape + +def str_decode_raw_unicode_escape(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0 + + builder = rutf8.Utf8StringBuilder(size) + pos = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + builder.append_code(ord(ch)) + pos += 1 + continue + + # \u-escapes are only interpreted iff the number of leading + # backslashes is odd + bs = pos + while pos < size: + pos += 1 + if pos == size or s[pos] != '\\': + break + builder.append_char('\\') + + # we have a backslash at the end of the string, stop here + if pos >= size: + builder.append_char('\\') + break + + if ((pos - bs) & 1 == 0 or pos >= size or + (s[pos] != 'u' and s[pos] != 'U')): + builder.append_char('\\') + builder.append_code(ord(s[pos])) + pos += 1 + continue + + digits = 4 if s[pos] == 'u' else 8 + message = "truncated \\uXXXX" + pos += 1 + pos = hexescape(builder, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) + + return builder.build(), pos, builder.getlength() + +_utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() + + +TABLE = '0123456789abcdef' + +def raw_unicode_escape_helper(result, char): + if char >= 0x10000 or char < 0: + result.append("\\U") + zeros = 8 + elif char >= 0x100: + result.append("\\u") + zeros = 4 + else: + result.append("\\x") + zeros = 2 + for i in range(zeros-1, -1, -1): + result.append(TABLE[(char >> (4 * i)) & 0x0f]) + +def utf8_encode_raw_unicode_escape(s, errors, errorhandler): + # errorhandler is not used: this function cannot cause Unicode errors + size = len(s) + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + while pos < size: + oc = rutf8.codepoint_at_pos(s, pos) + + if oc < 0x100: + result.append(chr(oc)) + else: + raw_unicode_escape_helper(result, oc) + pos = rutf8.next_codepoint_pos(s, pos) + + return result.build() + + +def utf8_encode_unicode_escape(s, errors, errorhandler): + return _utf8_encode_unicode_escape(s) + +# ____________________________________________________________ +# utf-7 + +# Three simple macros defining base-64 + +def _utf7_IS_BASE64(oc): + "Is c a base-64 character?" + c = chr(oc) + return c.isalnum() or c == '+' or c == '/' +def _utf7_TO_BASE64(n): + "Returns the base-64 character of the bottom 6 bits of n" + return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[n & 0x3f] +def _utf7_FROM_BASE64(c): + "given that c is a base-64 character, what is its base-64 value?" + if c >= 'a': + return ord(c) - 71 + elif c >= 'A': + return ord(c) - 65 + elif c >= '0': + return ord(c) + 4 + elif c == '+': + return 62 + else: # c == '/' + return 63 + +def _utf7_DECODE_DIRECT(oc): + return oc <= 127 and oc != ord('+') + +# The UTF-7 encoder treats ASCII characters differently according to +# whether they are Set D, Set O, Whitespace, or special (i.e. none of +# the above). See RFC2152. This array identifies these different +# sets: +# 0 : "Set D" +# alphanumeric and '(),-./:? +# 1 : "Set O" +# !"#$%&*;<=>@[]^_`{|} +# 2 : "whitespace" +# ht nl cr sp +# 3 : special (must be base64 encoded) +# everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127) + +utf7_category = [ +# nul soh stx etx eot enq ack bel bs ht nl vt np cr so si + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, +# dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +# sp ! " # $ % & ' ( ) * + , - . / + 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, +# 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, +# @ A B C D E F G H I J K L M N O + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +# P Q R S T U V W X Y Z [ \ ] ^ _ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, +# ` a b c d e f g h i j k l m n o + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +# p q r s t u v w x y z { | } ~ del + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, +] + +# ENCODE_DIRECT: this character should be encoded as itself. The +# answer depends on whether we are encoding set O as itself, and also +# on whether we are encoding whitespace as itself. RFC2152 makes it +# clear that the answers to these questions vary between +# applications, so this code needs to be flexible. + +def _utf7_ENCODE_DIRECT(oc, directO, directWS): + return(oc < 128 and oc > 0 and + (utf7_category[oc] == 0 or + (directWS and utf7_category[oc] == 2) or + (directO and utf7_category[oc] == 1))) + +def _utf7_ENCODE_CHAR(result, oc, base64bits, base64buffer): + if oc >= 0x10000: + # code first surrogate + base64bits += 16 + base64buffer = (base64buffer << 16) | 0xd800 | ((oc-0x10000) >> 10) + while base64bits >= 6: + result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6))) + base64bits -= 6 + # prepare second surrogate + oc = 0xDC00 | ((oc-0x10000) & 0x3FF) + base64bits += 16 + base64buffer = (base64buffer << 16) | oc + while base64bits >= 6: + result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6))) + base64bits -= 6 + return base64bits, base64buffer + +def str_decode_utf_7(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0 + + inShift = False + base64bits = 0 + base64buffer = 0 + surrogate = 0 + outsize = 0 + + result = StringBuilder(size) + pos = 0 + shiftOutStartPos = 0 + startinpos = 0 + while pos < size: + ch = s[pos] + + if inShift: # in a base-64 section + if _utf7_IS_BASE64(ord(ch)): #consume a base-64 character + base64buffer = (base64buffer << 6) | _utf7_FROM_BASE64(ch) + assert base64buffer >= 0 + base64bits += 6 + pos += 1 + + if base64bits >= 16: + # enough bits for a UTF-16 value + outCh = base64buffer >> (base64bits - 16) + assert outCh >= 0 + base64bits -= 16 + base64buffer &= (1 << base64bits) - 1 # clear high bits + assert outCh <= 0xffff + if surrogate: + # expecting a second surrogate + if outCh >= 0xDC00 and outCh <= 0xDFFF: + code = (((surrogate & 0x3FF)<<10) | + (outCh & 0x3FF)) + 0x10000 + rutf8.unichr_as_utf8_append(result, code) + outsize += 1 + surrogate = 0 + continue + else: + rutf8.unichr_as_utf8_append(result, surrogate, + allow_surrogates=True) + outsize += 1 + surrogate = 0 + # Not done with outCh: falls back to next line + if outCh >= 0xD800 and outCh <= 0xDBFF: + # first surrogate + surrogate = outCh + else: + outsize += 1 + assert outCh >= 0 + rutf8.unichr_as_utf8_append(result, outCh, True) + + else: + # now leaving a base-64 section + inShift = False + + if base64bits > 0: # left-over bits + if base64bits >= 6: + # We've seen at least one base-64 character + pos += 1 + msg = "partial character in shift sequence" + res, pos = errorhandler(errors, 'utf7', + msg, s, pos-1, pos) + reslen = rutf8.check_utf8(res, True) + outsize += reslen + result.append(res) + continue + else: + # Some bits remain; they should be zero + if base64buffer != 0: + pos += 1 + msg = "non-zero padding bits in shift sequence" + res, pos = errorhandler(errors, 'utf7', + msg, s, pos-1, pos) + reslen = rutf8.check_utf8(res, True) + outsize += reslen + result.append(res) + continue + + if surrogate and _utf7_DECODE_DIRECT(ord(ch)): + outsize += 1 + rutf8.unichr_as_utf8_append(result, surrogate, True) + surrogate = 0 + + if ch == '-': + # '-' is absorbed; other terminating characters are + # preserved + pos += 1 + + elif ch == '+': + startinpos = pos + pos += 1 # consume '+' + if pos < size and s[pos] == '-': # '+-' encodes '+' + pos += 1 + result.append('+') + outsize += 1 + else: # begin base64-encoded section + inShift = 1 + surrogate = 0 + shiftOutStartPos = result.getlength() + base64bits = 0 + base64buffer = 0 + + elif _utf7_DECODE_DIRECT(ord(ch)): # character decodes at itself + result.append(ch) + outsize += 1 + pos += 1 + else: + startinpos = pos + pos += 1 + msg = "unexpected special character" + res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) + reslen = rutf8.check_utf8(res, True) + outsize += reslen + result.append(res) + + # end of string + final_length = result.getlength() + if inShift and final: # in shift sequence, no more to follow + # if we're in an inconsistent state, that's an error + inShift = 0 + if (surrogate or + base64bits >= 6 or + (base64bits > 0 and base64buffer != 0)): + msg = "unterminated shift sequence" + res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) + reslen = rutf8.check_utf8(res, True) + outsize += reslen + result.append(res) + final_length = result.getlength() + elif inShift: + pos = startinpos + final_length = shiftOutStartPos # back off output + + assert final_length >= 0 + return result.build()[:final_length], pos, outsize + +def utf8_encode_utf_7(s, errors, errorhandler): + size = len(s) + if size == 0: + return '' + result = StringBuilder(size) + + encodeSetO = encodeWhiteSpace = False + + inShift = False + base64bits = 0 + base64buffer = 0 + + pos = 0 + while pos < size: + oc = rutf8.codepoint_at_pos(s, pos) + if not inShift: + if oc == ord('+'): + result.append('+-') + elif _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace): + result.append(chr(oc)) + else: + result.append('+') + inShift = True + base64bits, base64buffer = _utf7_ENCODE_CHAR( + result, oc, base64bits, base64buffer) + else: + if _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace): + # shifting out + if base64bits: # output remaining bits + result.append(_utf7_TO_BASE64(base64buffer << (6-base64bits))) + base64buffer = 0 + base64bits = 0 + + inShift = False + ## Characters not in the BASE64 set implicitly unshift the + ## sequence so no '-' is required, except if the character is + ## itself a '-' + if _utf7_IS_BASE64(oc) or oc == ord('-'): + result.append('-') + result.append(chr(oc)) + else: + base64bits, base64buffer = _utf7_ENCODE_CHAR( + result, oc, base64bits, base64buffer) + pos = rutf8.next_codepoint_pos(s, pos) + + if base64bits: + result.append(_utf7_TO_BASE64(base64buffer << (6 - base64bits))) + if inShift: + result.append('-') + + return result.build() # ____________________________________________________________ # utf-16 -def str_decode_utf_16(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, - errorhandler, "native", - 'utf-16-' + BYTEORDER2) - return result, length +BYTEORDER = sys.byteorder +BYTEORDER2 = BYTEORDER[0] + 'e' # either "le" or "be" +assert BYTEORDER2 in ('le', 'be') -def str_decode_utf_16_be(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, - errorhandler, "big", - 'utf-16-be') - return result, length +def str_decode_utf_16(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "native") + return result, c, lgt -def str_decode_utf_16_le(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final, - errorhandler, "little", - 'utf-16-le') - return result, length +def str_decode_utf_16_be(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "big") + return result, c, lgt -def str_decode_utf_16_helper(s, size, errors, final=True, +def str_decode_utf_16_le(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "little") + return result, c, lgt + +def str_decode_utf_16_helper(s, errors, final=True, errorhandler=None, byteorder="native", public_encoding_name='utf16'): - if errorhandler is None: - errorhandler = default_unicode_error_decode + size = len(s) bo = 0 if BYTEORDER == 'little': @@ -233,7 +1033,7 @@ else: bo = 1 if size == 0: - return u'', 0, bo + return '', 0, 0, bo if bo == -1: # force little endian ihi = 1 @@ -244,7 +1044,7 @@ ihi = 0 ilo = 1 - result = UnicodeBuilder(size // 2) + result = StringBuilder(size // 2) #XXX I think the errors are not correctly handled here while pos < size: @@ -261,7 +1061,7 @@ ch = (ord(s[pos + ihi]) << 8) | ord(s[pos + ilo]) pos += 2 if ch < 0xD800 or ch > 0xDFFF: - result.append(unichr(ch)) + rutf8.unichr_as_utf8_append(result, ch) continue # UTF-16 code pair: if len(s) - pos < 2: @@ -278,12 +1078,8 @@ ch2 = (ord(s[pos+ihi]) << 8) | ord(s[pos+ilo]) pos += 2 if 0xDC00 <= ch2 <= 0xDFFF: - if MAXUNICODE < 65536: - result.append(unichr(ch)) - result.append(unichr(ch2)) - else: - result.append(UNICHR((((ch & 0x3FF)<<10) | - (ch2 & 0x3FF)) + 0x10000)) + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 + rutf8.unichr_as_utf8_append(result, ch) continue else: r, pos = errorhandler(errors, public_encoding_name, @@ -295,7 +1091,9 @@ "illegal encoding", s, pos - 2, pos) result.append(r) - return result.build(), pos, bo + r = result.build() + lgt = rutf8.check_utf8(r, True) + return result.build(), pos, lgt, bo def _STORECHAR(result, CH, byteorder): hi = chr(((CH) >> 8) & 0xff) @@ -307,13 +1105,12 @@ result.append(hi) result.append(lo) -def unicode_encode_utf_16_helper(s, size, errors, +def unicode_encode_utf_16_helper(s, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf16'): - if errorhandler is None: - errorhandler = default_unicode_error_encode + size = len(s) if size == 0: if byteorder == 'native': result = StringBuilder(2) @@ -327,9 +1124,9 @@ byteorder = BYTEORDER pos = 0 + index = 0 while pos < size: - ch = ord(s[pos]) - pos += 1 + ch = rutf8.codepoint_at_pos(s, pos) if ch < 0xD800: _STORECHAR(result, ch, byteorder) @@ -339,46 +1136,44 @@ elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: - ru, rs, pos = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - if rs is not None: - # py3k only - if len(rs) % 2 != 0: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - result.append(rs) - continue - for ch in ru: - if ord(ch) < 0xD800: - _STORECHAR(result, ord(ch), byteorder) + res_8, newindex = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + for cp in rutf8.Utf8StringIterator(res_8): + if cp < 0xD800: + _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + return result.build() -def unicode_encode_utf_16(s, size, errors, +def utf8_encode_utf_16(s, errors, errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_16_helper(s, size, errors, errorhandler, + return unicode_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "native", 'utf-16-' + BYTEORDER2) -def unicode_encode_utf_16_be(s, size, errors, +def utf8_encode_utf_16_be(s, errors, errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_16_helper(s, size, errors, errorhandler, + return unicode_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "big", 'utf-16-be') -def unicode_encode_utf_16_le(s, size, errors, +def utf8_encode_utf_16_le(s, errors, errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_16_helper(s, size, errors, errorhandler, + return unicode_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "little", 'utf-16-le') @@ -386,38 +1181,38 @@ # ____________________________________________________________ # utf-32 -def str_decode_utf_32(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper( +def str_decode_utf_32(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final, s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2, allow_surrogates=False) - return result, length + return result, c, lgt -def str_decode_utf_32_be(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper( - s, size, errors, final, errorhandler, "big", 'utf-32-be', +def str_decode_utf_32_be(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_32_helper( + s, errors, final, errorhandler, "big", 'utf-32-be', allow_surrogates=False) - return result, length + return result, c, lgt -def str_decode_utf_32_le(s, size, errors, final=True, - errorhandler=None): - result, length, byteorder = str_decode_utf_32_helper( - s, size, errors, final, errorhandler, "little", 'utf-32-le', +def str_decode_utf_32_le(s, errors, final=True, + errorhandler=None): + result, c, lgt, _ = str_decode_utf_32_helper( + s, errors, final, errorhandler, "little", 'utf-32-le', allow_surrogates=False) - return result, length + return result, c, lgt -BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_DIRECT = intmask(0x0000FEFF) BOM32_REVERSE = intmask(0xFFFE0000) -def str_decode_utf_32_helper(s, size, errors, final=True, - errorhandler=None, +def str_decode_utf_32_helper(s, errors, final, + errorhandler, byteorder="native", public_encoding_name='utf32', allow_surrogates=True): - if errorhandler is None: - errorhandler = default_unicode_error_decode + assert errorhandler is not None bo = 0 + size = len(s) if BYTEORDER == 'little': iorder = [0, 1, 2, 3] @@ -453,7 +1248,7 @@ else: bo = 1 if size == 0: - return u'', 0, bo + return '', 0, 0, bo if bo == -1: # force little endian iorder = [0, 1, 2, 3] @@ -461,7 +1256,7 @@ # force big endian iorder = [3, 2, 1, 0] - result = UnicodeBuilder(size // 4) + result = StringBuilder(size // 4) while pos < size: # remaining bytes at the end? (size should be divisible by 4) @@ -476,7 +1271,7 @@ break continue ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | - (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) if not allow_surrogates and 0xD800 <= ch <= 0xDFFF: r, pos = errorhandler(errors, public_encoding_name, "code point in surrogate code point " @@ -487,18 +1282,15 @@ elif ch >= 0x110000: r, pos = errorhandler(errors, public_encoding_name, "codepoint not in range(0x110000)", - s, pos, pos + 4) + s, pos, len(s)) result.append(r) continue - if MAXUNICODE < 65536 and ch >= 0x10000: - ch -= 0x10000L - result.append(unichr(0xD800 + (ch >> 10))) - result.append(unichr(0xDC00 + (ch & 0x03FF))) - else: - result.append(UNICHR(ch)) + rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=allow_surrogates) pos += 4 - return result.build(), pos, bo + r = result.build() + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt, bo def _STORECHAR32(result, CH, byteorder): c0 = chr(((CH) >> 24) & 0xff) @@ -516,13 +1308,12 @@ result.append(c2) result.append(c3) -def unicode_encode_utf_32_helper(s, size, errors, +def unicode_encode_utf_32_helper(s, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf32'): - if errorhandler is None: - errorhandler = default_unicode_error_encode + size = len(s) if size == 0: if byteorder == 'native': result = StringBuilder(4) @@ -536,53 +1327,258 @@ byteorder = BYTEORDER pos = 0 + index = 0 while pos < size: - ch = ord(s[pos]) - pos += 1 - ch2 = 0 + ch = rutf8.codepoint_at_pos(s, pos) + pos = rutf8.next_codepoint_pos(s, pos) if not allow_surrogates and 0xD800 <= ch < 0xE000: - ru, rs, pos = errorhandler( + res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) - if rs is not None: - # py3k only - if len(rs) % 4 != 0: + for ch in rutf8.Utf8StringIterator(res_8): + if ch < 0xD800: + _STORECHAR32(result, ch, byteorder) + else: errorhandler( 'strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) - result.append(rs) - continue - for ch in ru: - if ord(ch) < 0xD800: - _STORECHAR32(result, ord(ch), byteorder) - else: - errorhandler( - 'strict', public_encoding_name, - 'surrogates not allowed', s, pos - 1, pos) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) continue - if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: - ch2 = ord(s[pos]) - if 0xDC00 <= ch2 < 0xE000: - ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 - pos += 1 _STORECHAR32(result, ch, byteorder) + index += 1 return result.build() -def unicode_encode_utf_32(s, size, errors, - errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_32_helper(s, size, errors, errorhandler, +def utf8_encode_utf_32(s, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, errors, errorhandler, allow_surrogates, "native", 'utf-32-' + BYTEORDER2) -def unicode_encode_utf_32_be(s, size, errors, +def utf8_encode_utf_32_be(s, errors, errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + return unicode_encode_utf_32_helper(s, errors, errorhandler, allow_surrogates, "big", 'utf-32-be') -def unicode_encode_utf_32_le(s, size, errors, +def utf8_encode_utf_32_le(s, errors, errorhandler=None, allow_surrogates=True): - return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + return unicode_encode_utf_32_helper(s, errors, errorhandler, allow_surrogates, "little", 'utf-32-le') +# ____________________________________________________________ +# unicode-internal + +def str_decode_unicode_internal(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0 + + unicode_bytes = 4 + if BYTEORDER == "little": + start = 0 + stop = unicode_bytes + step = 1 + else: + start = unicode_bytes - 1 + stop = -1 + step = -1 + + result = StringBuilder(size) + pos = 0 + while pos < size: + if pos > size - unicode_bytes: + res, pos = errorhandler(errors, "unicode_internal", + "truncated input", + s, pos, size) + result.append(res) + if pos > size - unicode_bytes: + break + continue + t = r_uint(0) + h = 0 + for j in range(start, stop, step): + t += r_uint(ord(s[pos + j])) << (h*8) + h += 1 + if t > 0x10ffff: + res, pos = errorhandler(errors, "unicode_internal", + "unichr(%d) not in range" % (t,), + s, pos, pos + unicode_bytes) + result.append(res) + continue + rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True) + pos += unicode_bytes + r = result.build() + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt + +def utf8_encode_unicode_internal(s, errors, errorhandler): + size = len(s) + if size == 0: + return '' + + result = StringBuilder(size * 4) + pos = 0 + while pos < size: + oc = rutf8.codepoint_at_pos(s, pos) + if BYTEORDER == "little": + result.append(chr(oc & 0xFF)) + result.append(chr(oc >> 8 & 0xFF)) + result.append(chr(oc >> 16 & 0xFF)) + result.append(chr(oc >> 24 & 0xFF)) + else: + result.append(chr(oc >> 24 & 0xFF)) + result.append(chr(oc >> 16 & 0xFF)) + result.append(chr(oc >> 8 & 0xFF)) + result.append(chr(oc & 0xFF)) + pos = rutf8.next_codepoint_pos(s, pos) + + return result.build() + +# ____________________________________________________________ +# Charmap + +ERROR_CHAR = u'\ufffe'.encode('utf8') + + at specialize.argtype(4) +def str_decode_charmap(s, errors, final=False, + errorhandler=None, mapping=None): + "mapping can be a rpython dictionary, or a dict-like object." + + # Default to Latin-1 + if mapping is None: + return str_decode_latin_1(s, errors, final=final, + errorhandler=errorhandler) + size = len(s) + if size == 0: + return '', 0, 0 + + pos = 0 + result = StringBuilder(size) + while pos < size: + ch = s[pos] + + c = mapping.get(ord(ch), ERROR_CHAR) + if c == ERROR_CHAR: + r, pos = errorhandler(errors, "charmap", + "character maps to ", + s, pos, pos + 1) + result.append(r) + continue + result.append(c) + pos += 1 + r = result.build() + lgt = rutf8.check_utf8(r, True) + return r, pos, lgt + +def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): + size = len(s) + if mapping is None: + return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) + + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + index = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + c = mapping.get(ch, '') + if len(c) == 0: + # collect all unencodable chars. + startindex = index + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + while (pos < size and + mapping.get(rutf8.codepoint_at_pos(s, pos), '') == ''): + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + res_8, newindex = errorhandler(errors, "charmap", + "character maps to ", + s, startindex, index) + for cp2 in rutf8.Utf8StringIterator(res_8): + ch2 = mapping.get(cp2, '') + if not ch2: + errorhandler( + "strict", "charmap", "character maps to ", + s, startindex, index) + result.append(ch2) + if index != newindex: # Should be uncommon + index = newindex + pos = rutf8._pos_at_index(s, newindex) + continue + result.append(c) + index += 1 + pos = rutf8.next_codepoint_pos(s, pos) + return result.build() + +# ____________________________________________________________ +# Decimal Encoder +def unicode_encode_decimal(s, errors, errorhandler=None): + """Converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + """ + if errorhandler is None: + errorhandler = default_error_encode + result = StringBuilder(len(s)) From pypy.commits at gmail.com Mon Jun 11 01:27:59 2018 From: pypy.commits at gmail.com (Matti Picus) Date: Sun, 10 Jun 2018 22:27:59 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix imports. Tests start to run. str_decode_utf8 replaces decode_utf8 but args have changed Message-ID: <5b1e085f.1c69fb81.1ffb7.03cc@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r94753:40650baa7fd6 Date: 2018-06-10 22:20 -0700 http://bitbucket.org/pypy/pypy/changeset/40650baa7fd6/ Log: fix imports. Tests start to run. str_decode_utf8 replaces decode_utf8 but args have changed diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -247,9 +247,7 @@ def unicode_w(self, space): self._typed_unwrap_error(space, "string") - - def text_w(self, space): - self._typed_unwrap_error(space, "string") + realunicode_w = unicode_w def utf8_w(self, space): self._typed_unwrap_error(space, "unicode") @@ -1732,7 +1730,6 @@ return rstring.assert_str0(result) realtext_w = text_w # Python 2 compatibility - realunicode_w = unicode_w def fsencode(space, w_obj): from pypy.interpreter.unicodehelper import fsencode diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -4,7 +4,7 @@ import struct import sys from pypy.interpreter.unicodehelper import ( - encode_utf8, decode_utf8, unicode_encode_utf_32_be, str_decode_utf_32_be) + encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be) from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -304,11 +304,12 @@ errorhandler=errorhandler) return res.encode('utf8'), size, len(res) -def str_decode_utf8(s, errors, final, errorhandler): +def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False): """ Same as checking for the valid utf8, but we know the utf8 is not valid so we're trying to either raise or pack stuff with error handler. The key difference is that this is call_may_force """ + # XXX need to handle allow_surrogates slen = len(s) res = StringBuilder(slen) pos = 0 @@ -967,6 +968,32 @@ return result.build() +def encode_utf8(space, uni, allow_surrogates=False): + # Note that Python3 tends to forbid *all* surrogates in utf-8. + # If allow_surrogates=True, then revert to the Python 2 behavior + # which never raises UnicodeEncodeError. Surrogate pairs are then + # allowed, either paired or lone. A paired surrogate is considered + # like the non-BMP character it stands for. See also *_utf8sp(). + assert isinstance(uni, unicode) + return runicode.unicode_encode_utf_8( + uni, len(uni), "strict", + errorhandler=encode_error_handler(space), + allow_surrogates=allow_surrogates) + +def encode_utf8sp(space, uni): + # Surrogate-preserving utf-8 encoding. Any surrogate character + # turns into its 3-bytes encoding, whether it is paired or not. + # This should always be reversible, and the reverse is + # decode_utf8sp(). + return runicode.unicode_encode_utf8sp(uni, len(uni)) + +def decode_utf8sp(space, string): + # Surrogate-preserving utf-8 decoding. Assuming there is no + # encoding error, it should always be reversible, and the reverse is + # encode_utf8sp(). + return decode_utf8(space, string, allow_surrogates=True) + + # ____________________________________________________________ # utf-16 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -86,7 +86,7 @@ newpos = -1 else: if newpos < 0: - newpos = length + newpos + newpos = length + newpos if newpos < 0 or newpos > length: raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -12,7 +12,7 @@ from pypy.interpreter.mixedmodule import MixedModule from pypy.interpreter.signature import Signature from pypy.interpreter.typedef import TypeDef -from pypy.interpreter.unicodehelper import decode_utf8 +from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.objspace.std.util import negate @@ -1184,7 +1184,7 @@ # we should implement the same shortcuts as we do for BytesDictStrategy def decodekey_str(self, key): - return decode_utf8(self.space, key, allow_surrogates=True) + return str_decode_utf8(self.space, key, allow_surrogates=True) def setitem_str(self, w_dict, key, w_value): assert key is not None diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py --- a/pypy/objspace/std/mapdict.py +++ b/pypy/objspace/std/mapdict.py @@ -4,7 +4,7 @@ from rpython.rlib.rarithmetic import intmask, r_uint from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.unicodehelper import decode_utf8 +from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.objspace.std.dictmultiobject import ( W_DictMultiObject, DictStrategy, ObjectDictStrategy, BaseKeyIterator, BaseValueIterator, BaseItemIterator, _never_equal_to_string, @@ -433,7 +433,7 @@ def materialize_str_dict(self, space, obj, str_dict): new_obj = self.back.materialize_str_dict(space, obj, str_dict) if self.index == DICT: - uni_name = decode_utf8(space, self.name) + uni_name = str_decode_utf8(space, self.name) str_dict[uni_name] = obj._mapdict_read_storage(self.storageindex) else: self._copy_attr(obj, new_obj) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -4,7 +4,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.function import Function, Method, FunctionWithFixedCode from pypy.interpreter.typedef import get_unique_interplevel_subclass -from pypy.interpreter.unicodehelper import decode_utf8 +from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.objspace.std import frame, transparent, callmethod from pypy.objspace.descroperation import ( DescrOperation, get_attribute_name, raiseattrerror) @@ -165,7 +165,7 @@ unicode_x = x.decode('ascii') except UnicodeDecodeError: return self._wrap_string_old(x) - return self.newunicode(unicode_x) + return self.newtext(unicode_x) if isinstance(x, unicode): x = x.encode('utf8') lgt = rutf8.check_utf8(x, True) @@ -192,7 +192,7 @@ else: lst.append(unichr(ch)) unicode_x = u''.join(lst) - return self.newunicode(unicode_x) + return self.newtext(unicode_x) @not_rpython # only for tests def _wrap_not_rpython(self, x): @@ -334,7 +334,7 @@ def newlist_text(self, list_t): return self.newlist_unicode([ - decode_utf8(self, s, allow_surrogates=True) for s in list_t]) + str_decode_utf8(self, s, allow_surrogates=True) for s in list_t]) def newlist_utf8(self, list_u, is_ascii): if is_ascii: @@ -388,7 +388,7 @@ return W_BytearrayObject(l) def newtext(self, s): - return self.newunicode(decode_utf8(self, s, allow_surrogates=True)) + return self.newtext(str_decode_utf8(self, s, allow_surrogates=True)) def newtext_or_none(self, s): if s is None: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -6,8 +6,9 @@ from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.rstring import ( StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith, - unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii, - unicode_encode_utf8_forbid_surrogates, SurrogateError, endswith) + endswith) +from rpython.rlib.runicode import ( + unicode_encode_utf8_forbid_surrogates, SurrogateError) from rpython.rlib import rutf8, jit from pypy.interpreter import unicodehelper @@ -1851,4 +1852,4 @@ return unicode_encode_utf8_forbid_surrogates(value, len(value)) _repr_function = rutf8.make_utf8_escape_function( - pass_printable=True, unicode_output=True, quotes=True, prefix='') + pass_printable=True, quotes=True, prefix='') From pypy.commits at gmail.com Mon Jun 11 01:28:01 2018 From: pypy.commits at gmail.com (Matti Picus) Date: Sun, 10 Jun 2018 22:28:01 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5b1e0861.1c69fb81.86189.ce7a@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r94754:daeb185c5482 Date: 2018-06-10 22:26 -0700 http://bitbucket.org/pypy/pypy/changeset/daeb185c5482/ Log: merge default into branch diff too long, truncating to 2000 out of 5932 lines diff --git a/lib_pypy/grp.py b/lib_pypy/grp.py --- a/lib_pypy/grp.py +++ b/lib_pypy/grp.py @@ -4,6 +4,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -33,32 +35,35 @@ @builtinify def getgrgid(gid): - res = lib.getgrgid(gid) - if not res: - # XXX maybe check error eventually - raise KeyError(gid) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrgid(gid) + if not res: + # XXX maybe check error eventually + raise KeyError(gid) + return _group_from_gstruct(res) @builtinify def getgrnam(name): if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - res = lib.getgrnam(name) - if not res: - raise KeyError("'getgrnam(): name not found: %s'" % name) - return _group_from_gstruct(res) + with _lock: + res = lib.getgrnam(name) + if not res: + raise KeyError("'getgrnam(): name not found: %s'" % name) + return _group_from_gstruct(res) @builtinify def getgrall(): - lib.setgrent() lst = [] - while 1: - p = lib.getgrent() - if not p: - break - lst.append(_group_from_gstruct(p)) - lib.endgrent() + with _lock: + lib.setgrent() + while 1: + p = lib.getgrent() + if not p: + break + lst.append(_group_from_gstruct(p)) + lib.endgrent() return lst __all__ = ('struct_group', 'getgrgid', 'getgrnam', 'getgrall') diff --git a/lib_pypy/pwd.py b/lib_pypy/pwd.py --- a/lib_pypy/pwd.py +++ b/lib_pypy/pwd.py @@ -12,6 +12,8 @@ from _pwdgrp_cffi import ffi, lib import _structseq +import thread +_lock = thread.allocate_lock() try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f @@ -55,10 +57,11 @@ Return the password database entry for the given numeric user ID. See pwd.__doc__ for more on password database entries. """ - pw = lib.getpwuid(uid) - if not pw: - raise KeyError("getpwuid(): uid not found: %s" % uid) - return _mkpwent(pw) + with _lock: + pw = lib.getpwuid(uid) + if not pw: + raise KeyError("getpwuid(): uid not found: %s" % uid) + return _mkpwent(pw) @builtinify def getpwnam(name): @@ -71,10 +74,11 @@ if not isinstance(name, basestring): raise TypeError("expected string") name = str(name) - pw = lib.getpwnam(name) - if not pw: - raise KeyError("getpwname(): name not found: %s" % name) - return _mkpwent(pw) + with _lock: + pw = lib.getpwnam(name) + if not pw: + raise KeyError("getpwname(): name not found: %s" % name) + return _mkpwent(pw) @builtinify def getpwall(): @@ -84,13 +88,14 @@ See pwd.__doc__ for more on password database entries. """ users = [] - lib.setpwent() - while True: - pw = lib.getpwent() - if not pw: - break - users.append(_mkpwent(pw)) - lib.endpwent() + with _lock: + lib.setpwent() + while True: + pw = lib.getpwent() + if not pw: + break + users.append(_mkpwent(pw)) + lib.endpwent() return users __all__ = ('struct_passwd', 'getpwuid', 'getpwnam', 'getpwall') diff --git a/pypy/doc/install.rst b/pypy/doc/install.rst --- a/pypy/doc/install.rst +++ b/pypy/doc/install.rst @@ -20,7 +20,7 @@ OS and architecture. You may be able to use either use the `most recent release`_ or one of our `development nightly build`_. These builds depend on dynamically linked libraries that may not be available on your -OS. See the section about `Linux binaries` for more info and alternatives that +OS. See the section about `Linux binaries`_ for more info and alternatives that may work on your system. Please note that the nightly builds are not diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -7,9 +7,9 @@ .. branch: cppyy-packaging -Upgrade to backend 0.6.0, support exception handling from wrapped functions, -update enum handling, const correctness for data members and associated tests, -support anonymous enums, support for function pointer arguments +Upgrade to backend 1.1.0, improved handling of templated methods and +functions (in particular automatic deduction of types), improved pythonization +interface, and a range of compatibility fixes for Python3 .. branch: socket_default_timeout_blockingness @@ -28,9 +28,11 @@ The reverse-debugger branch has been merged. For more information, see https://bitbucket.org/pypy/revdb +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. + .. branch: unicode-utf8-re .. branch: utf8-io Utf8 handling for unicode - - diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -29,29 +29,28 @@ ``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` or in ``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. -A current version of ``setuptools`` will be able to find it there. For -Windows 10, you must right-click the download, and under ``Properties`` -> -``Compatibility`` mark it as ``Run run this program in comatibility mode for`` -``Previous version...``. Also, you must download and install the ``.Net Framework 3.5``, +A current version of ``setuptools`` will be able to find it there. +Also, you must download and install the ``.Net Framework 3.5``, otherwise ``mt.exe`` will silently fail. Installation will begin automatically by running the mt.exe command by hand from a DOS window (that is how the author discovered the problem). .. _Microsoft Visual C++ Compiler for Python 2.7: https://www.microsoft.com/EN-US/DOWNLOAD/DETAILS.ASPX?ID=44266 -Installing "Build Tools for Visual Studio 2017" (for Python 3) +Installing "Build Tools for Visual Studio 2015" (for Python 3) -------------------------------------------------------------- -As documented in the CPython Wiki_, CPython now recommends Visual C++ version -14.0. A compact version of the compiler suite can be obtained from Microsoft_ -downloads, search the page for "Build Tools for Visual Studio 2017". +As documented in the CPython Wiki_, CPython recommends Visual C++ version +14.0 for python version 3.5. A compact version of the compiler suite can be +obtained from Microsoft_ downloads, search the page for "Microsoft Build Tools 2015". -You will also need to install the the `Windows SDK`_ in order to use the -`mt.exe` mainfest compiler. +You will need to reboot the computer for the installation to successfully install and +run the `mt.exe` mainfest compiler. The installation will set the +`VS140COMNTOOLS` environment variable, this is key to distutils/setuptools +finding the compiler .. _Wiki: https://wiki.python.org/moin/WindowsCompilers -.. _Microsoft: https://www.visualstudio.com/downloads -.. _`Windows SDK`: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk +.. _Microsoft: https://www.visualstudio.com/vs/older-downloads/ Translating PyPy with Visual Studio ----------------------------------- @@ -99,6 +98,9 @@ Setting Up Visual Studio 9.0 for building SSL in Python3 -------------------------------------------------------- +**Note: this is old information, left for historical reference. We recommend +using Visual Studio 2015, which now seems to properly set this all up.** + On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after translation. However ``distutils`` does not support the Micorosft-provided Visual C compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The @@ -146,14 +148,14 @@ Installing external packages ---------------------------- -We uses a `repository` parallel to pypy to hold binary compiled versions of the +We uses a subrepository_ inside pypy to hold binary compiled versions of the build dependencies for windows. As part of the `rpython` setup stage, environment variables will be set to use these dependencies. The repository has a README file on how to replicate, and a branch for each supported platform. You may run the `get_externals.py` utility to checkout the proper branch for your platform and PyPy version. -.. _repository: https://bitbucket.org/pypy/external +.. _subrepository: https://bitbucket.org/pypy/external Using the mingw compiler ------------------------ diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1096,7 +1096,7 @@ s = self.get_first_expr("'hi' ' implicitly' ' extra'") assert isinstance(s, ast.Str) assert space.eq_w(s.s, space.wrap("hi implicitly extra")) - sentence = u"Die Männer ärgen sich!" + sentence = u"Die Männer ärgern sich!" source = u"# coding: utf-7\nstuff = u'%s'" % (sentence,) info = pyparse.CompileInfo("", "exec") tree = self.parser.parse_source(source.encode("utf-7"), info) diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -27,7 +27,7 @@ generator._resolve_block_targets(blocks) return generator, blocks -class TestCompiler: +class BaseTestCompiler: """These tests compile snippets of code and check them by running them with our own interpreter. These are thus not completely *unit* tests, but given that our interpreter is @@ -74,6 +74,9 @@ def error_test(self, source, exc_type): py.test.raises(exc_type, self.simple_test, source, None, None) + +class TestCompiler(BaseTestCompiler): + def test_issue_713(self): func = "def f(_=2): return (_ if _ else _) if False else _" yield self.st, func, "f()", 2 @@ -953,9 +956,11 @@ yield (self.st, "x=(lambda: (-0.0, 0.0), lambda: (0.0, -0.0))[1]()", 'repr(x)', '(0.0, -0.0)') +class TestCompilerRevDB(BaseTestCompiler): + spaceconfig = {"translation.reverse_debugger": True} + def test_revdb_metavar(self): from pypy.interpreter.reverse_debugging import dbstate, setup_revdb - self.space.config.translation.reverse_debugger = True self.space.reverse_debugging = True try: setup_revdb(self.space) diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -43,7 +43,7 @@ self.tok = self.tokens[index] def skip(self, n): - if self.tok[0] == n: + if self.tok.token_type == n: self.next() return True else: @@ -51,7 +51,7 @@ def skip_name(self, name): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME and self.tok[1] == name: + if self.tok.token_type == pygram.tokens.NAME and self.tok.value == name: self.next() return True else: @@ -59,8 +59,8 @@ def next_feature_name(self): from pypy.interpreter.pyparser import pygram - if self.tok[0] == pygram.tokens.NAME: - name = self.tok[1] + if self.tok.token_type == pygram.tokens.NAME: + name = self.tok.value self.next() if self.skip_name("as"): self.skip(pygram.tokens.NAME) @@ -101,7 +101,7 @@ # somewhere inside the last __future__ import statement # (at the start would be fine too, but it's easier to grab a # random position inside) - last_position = (it.tok[2], it.tok[3]) + last_position = (it.tok.lineno, it.tok.column) result |= future_flags.get_compiler_feature(it.next_feature_name()) while it.skip(pygram.tokens.COMMA): result |= future_flags.get_compiler_feature(it.next_feature_name()) diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -28,11 +28,24 @@ new.symbol_ids = self.symbol_ids new.symbols_names = self.symbol_names new.keyword_ids = self.keyword_ids + new.token_to_error_string = self.token_to_error_string new.dfas = self.dfas new.labels = self.labels new.token_ids = self.token_ids return new + + def classify(self, token): + """Find the label for a token.""" + if token.token_type == self.KEYWORD_TOKEN: + label_index = self.keyword_ids.get(token.value, -1) + if label_index != -1: + return label_index + label_index = self.token_ids.get(token.token_type, -1) + if label_index == -1: + raise ParseError("invalid token", token) + return label_index + def _freeze_(self): # Remove some attributes not used in parsing. try: @@ -65,6 +78,33 @@ b[pos] |= bit return str(b) + +class Token(object): + def __init__(self, token_type, value, lineno, column, line): + self.token_type = token_type + self.value = value + self.lineno = lineno + # 0-based offset + self.column = column + self.line = line + + def __repr__(self): + return "Token(%s, %s)" % (self.token_type, self.value) + + def __eq__(self, other): + # for tests + return ( + self.token_type == other.token_type and + self.value == other.value and + self.lineno == other.lineno and + self.column == other.column and + self.line == other.line + ) + + def __ne__(self, other): + return not self == other + + class Node(object): __slots__ = ("type", ) @@ -105,6 +145,11 @@ self.lineno = lineno self.column = column + @staticmethod + def fromtoken(token): + return Terminal( + token.token_type, token.value, token.lineno, token.column) + def __repr__(self): return "Terminal(type=%s, value=%r)" % (self.type, self.value) @@ -193,20 +238,14 @@ class ParseError(Exception): - def __init__(self, msg, token_type, value, lineno, column, line, - expected=-1, expected_str=None): + def __init__(self, msg, token, expected=-1, expected_str=None): self.msg = msg - self.token_type = token_type - self.value = value - self.lineno = lineno - # this is a 0-based index - self.column = column - self.line = line + self.token = token self.expected = expected self.expected_str = expected_str def __str__(self): - return "ParserError(%s, %r)" % (self.token_type, self.value) + return "ParserError(%s)" % (self.token, ) class StackEntry(object): @@ -249,8 +288,8 @@ self.root = None self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) - def add_token(self, token_type, value, lineno, column, line): - label_index = self.classify(token_type, value, lineno, column, line) + def add_token(self, token): + label_index = self.grammar.classify(token) sym_id = 0 # for the annotator while True: dfa = self.stack.dfa @@ -261,7 +300,7 @@ sym_id = self.grammar.labels[i] if label_index == i: # We matched a non-terminal. - self.shift(next_state, token_type, value, lineno, column) + self.shift(next_state, token) state = states[next_state] # While the only possible action is to accept, pop nodes off # the stack. @@ -278,8 +317,7 @@ sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. if sub_node_dfa.could_match_token(label_index): - self.push(sub_node_dfa, next_state, sym_id, lineno, - column) + self.push(sub_node_dfa, next_state, sym_id) break else: # We failed to find any arcs to another state, so unless this @@ -287,8 +325,7 @@ if is_accepting: self.pop() if self.stack is None: - raise ParseError("too much input", token_type, value, - lineno, column, line) + raise ParseError("too much input", token) else: # If only one possible input would satisfy, attach it to the # error. @@ -299,28 +336,16 @@ else: expected = -1 expected_str = None - raise ParseError("bad input", token_type, value, lineno, - column, line, expected, expected_str) + raise ParseError("bad input", token, expected, expected_str) - def classify(self, token_type, value, lineno, column, line): - """Find the label for a token.""" - if token_type == self.grammar.KEYWORD_TOKEN: - label_index = self.grammar.keyword_ids.get(value, -1) - if label_index != -1: - return label_index - label_index = self.grammar.token_ids.get(token_type, -1) - if label_index == -1: - raise ParseError("invalid token", token_type, value, lineno, column, - line) - return label_index - def shift(self, next_state, token_type, value, lineno, column): + def shift(self, next_state, token): """Shift a non-terminal and prepare for the next state.""" - new_node = Terminal(token_type, value, lineno, column) + new_node = Terminal.fromtoken(token) self.stack.node_append_child(new_node) self.stack.state = next_state - def push(self, next_dfa, next_state, node_type, lineno, column): + def push(self, next_dfa, next_state, node_type): """Push a terminal and adjust the current state.""" self.stack.state = next_state self.stack = self.stack.push(next_dfa, 0) diff --git a/pypy/interpreter/pyparser/pygram.py b/pypy/interpreter/pyparser/pygram.py --- a/pypy/interpreter/pyparser/pygram.py +++ b/pypy/interpreter/pyparser/pygram.py @@ -23,6 +23,17 @@ python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy() del python_grammar_no_print.keyword_ids["print"] +python_grammar_revdb = python_grammar.shared_copy() +python_grammar_no_print_revdb = python_grammar_no_print.shared_copy() +copied_token_ids = python_grammar.token_ids.copy() +python_grammar_revdb.token_ids = copied_token_ids +python_grammar_no_print_revdb.token_ids = copied_token_ids + +metavar_token_id = pytoken.python_tokens['REVDBMETAVAR'] +# the following line affects python_grammar_no_print too, since they share the +# dict +del python_grammar.token_ids[metavar_token_id] + class _Tokens(object): pass for tok_name, idx in pytoken.python_tokens.iteritems(): @@ -39,3 +50,16 @@ syms._rev_lookup = rev_lookup # for debugging del _get_python_grammar, _Tokens, tok_name, sym_name, idx + +def choose_grammar(print_function, revdb): + if print_function: + if revdb: + return python_grammar_no_print_revdb + else: + return python_grammar_no_print + else: + if revdb: + return python_grammar_revdb + else: + return python_grammar + diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py --- a/pypy/interpreter/pyparser/pyparse.py +++ b/pypy/interpreter/pyparser/pyparse.py @@ -147,38 +147,37 @@ flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) - tp = 0 try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) - - newflags, last_future_import = ( - future.add_future_flags(self.future_flags, tokens)) - compile_info.last_future_import = last_future_import - compile_info.flags |= newflags - - if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION: - self.grammar = pygram.python_grammar_no_print - else: - self.grammar = pygram.python_grammar - - for tp, value, lineno, column, line in tokens: - if self.add_token(tp, value, lineno, column, line): - break except error.TokenError as e: e.filename = compile_info.filename raise except error.TokenIndentationError as e: e.filename = compile_info.filename raise + + newflags, last_future_import = ( + future.add_future_flags(self.future_flags, tokens)) + compile_info.last_future_import = last_future_import + compile_info.flags |= newflags + + self.grammar = pygram.choose_grammar( + print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, + revdb=self.space.config.translation.reverse_debugger) + + try: + for token in tokens: + if self.add_token(token): + break except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError - if tp == pygram.tokens.INDENT: + if token.token_type == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" @@ -190,7 +189,7 @@ # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' - raise new_err(msg, e.lineno, e.column + 1, e.line, + raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line, compile_info.filename) else: tree = self.root diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py --- a/pypy/interpreter/pyparser/pytokenize.py +++ b/pypy/interpreter/pyparser/pytokenize.py @@ -1,9 +1,6 @@ # ______________________________________________________________________ """Module pytokenize -THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED -TO BE ANNOTABLE (Mainly made lists homogeneous) - This is a modified version of Ka-Ping Yee's tokenize module found in the Python standard library. @@ -12,7 +9,6 @@ expressions have been replaced with hand built DFA's using the basil.util.automata module. -$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ """ # ______________________________________________________________________ @@ -65,22 +61,3 @@ single_quoted[t] = t tabsize = 8 - -# PYPY MODIFICATION: removed TokenError class as it's not needed here - -# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here - -# PYPY MODIFICATION: removed printtoken() as it's not needed here - -# PYPY MODIFICATION: removed tokenize() as it's not needed here - -# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here - -# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified -# in pythonlexer.py - -# PYPY MODIFICATION: removed main() as it's not needed here - -# ______________________________________________________________________ -# End of pytokenize.py - diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py --- a/pypy/interpreter/pyparser/pytokenizer.py +++ b/pypy/interpreter/pyparser/pytokenizer.py @@ -1,4 +1,5 @@ from pypy.interpreter.pyparser import automata +from pypy.interpreter.pyparser.parser import Token from pypy.interpreter.pyparser.pygram import tokens from pypy.interpreter.pyparser.pytoken import python_opmap from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError @@ -103,7 +104,7 @@ endmatch = endDFA.recognize(line) if endmatch >= 0: pos = end = endmatch - tok = (tokens.STRING, contstr + line[:end], strstart[0], + tok = Token(tokens.STRING, contstr + line[:end], strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -111,7 +112,7 @@ contline = None elif (needcont and not line.endswith('\\\n') and not line.endswith('\\\r\n')): - tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], + tok = Token(tokens.ERRORTOKEN, contstr + line, strstart[0], strstart[1], line) token_list.append(tok) last_comment = '' @@ -140,11 +141,11 @@ if column > indents[-1]: # count indents or dedents indents.append(column) - token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) + token_list.append(Token(tokens.INDENT, line[:pos], lnum, 0, line)) last_comment = '' while column < indents[-1]: indents.pop() - token_list.append((tokens.DEDENT, '', lnum, pos, line)) + token_list.append(Token(tokens.DEDENT, '', lnum, pos, line)) last_comment = '' if column != indents[-1]: err = "unindent does not match any outer indentation level" @@ -177,11 +178,11 @@ token, initial = line[start:end], line[start] if initial in numchars or \ (initial == '.' and token != '.'): # ordinary number - token_list.append((tokens.NUMBER, token, lnum, start, line)) + token_list.append(Token(tokens.NUMBER, token, lnum, start, line)) last_comment = '' elif initial in '\r\n': if not parenstack: - tok = (tokens.NEWLINE, last_comment, lnum, start, line) + tok = Token(tokens.NEWLINE, last_comment, lnum, start, line) token_list.append(tok) last_comment = '' elif initial == '#': @@ -193,7 +194,7 @@ if endmatch >= 0: # all on one line pos = endmatch token = line[start:pos] - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' else: @@ -212,16 +213,16 @@ contline = line break else: # ordinary string - tok = (tokens.STRING, token, lnum, start, line) + tok = Token(tokens.STRING, token, lnum, start, line) token_list.append(tok) last_comment = '' elif initial in namechars: # ordinary name - token_list.append((tokens.NAME, token, lnum, start, line)) + token_list.append(Token(tokens.NAME, token, lnum, start, line)) last_comment = '' elif initial == '\\': # continued stmt continued = 1 elif initial == '$': - token_list.append((tokens.REVDBMETAVAR, token, + token_list.append(Token(tokens.REVDBMETAVAR, token, lnum, start, line)) last_comment = '' else: @@ -246,7 +247,7 @@ punct = python_opmap[token] else: punct = tokens.OP - token_list.append((punct, token, lnum, start, line)) + token_list.append(Token(punct, token, lnum, start, line)) last_comment = '' else: start = whiteSpaceDFA.recognize(line, pos) @@ -255,22 +256,22 @@ if start 0: return FunctionPointerConverter(space, name[pos+2:]) - # 5) void* or void converter (which fails on use) + # void* or void converter (which fails on use) if 0 <= compound.find('*'): return VoidPtrConverter(space, default) # "user knows best" @@ -797,6 +861,7 @@ _converters["bool"] = BoolConverter _converters["char"] = CharConverter +_converters["unsigned char"] = UCharConverter _converters["float"] = FloatConverter _converters["const float&"] = ConstFloatRefConverter _converters["double"] = DoubleConverter @@ -886,6 +951,7 @@ "NOT_RPYTHON" array_info = ( ('b', rffi.sizeof(rffi.UCHAR), ("bool",)), # is debatable, but works ... + ('B', rffi.sizeof(rffi.UCHAR), ("unsigned char",)), ('h', rffi.sizeof(rffi.SHORT), ("short int", "short")), ('H', rffi.sizeof(rffi.USHORT), ("unsigned short int", "unsigned short")), ('i', rffi.sizeof(rffi.INT), ("int",)), @@ -901,9 +967,11 @@ for tcode, tsize, names in array_info: class ArrayConverter(ArrayTypeConverterMixin, TypeConverter): + _immutable_fields_ = ['typecode', 'typesize'] typecode = tcode typesize = tsize class PtrConverter(PtrTypeConverterMixin, TypeConverter): + _immutable_fields_ = ['typecode', 'typesize'] typecode = tcode typesize = tsize for name in names: @@ -912,6 +980,7 @@ # special case, const char* w/ size and w/o '\0' _a_converters["const char[]"] = CStringConverterWithSize + _a_converters["char[]"] = _a_converters["const char[]"] # debatable _build_array_converters() @@ -919,7 +988,6 @@ def _add_aliased_converters(): "NOT_RPYTHON" aliases = ( - ("char", "unsigned char"), # TODO: check ("char", "signed char"), # TODO: check ("const char*", "char*"), diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -5,7 +5,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib import jit_libffi -from pypy.module._rawffi.interp_rawffi import unpack_simple_shape +from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance from pypy.module._cppyy import helper, capi, ffitypes @@ -56,11 +56,11 @@ raise NotImplementedError lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptrval = rffi.cast(rffi.ULONG, lresult) - arr = space.interp_w(W_Array, unpack_simple_shape(space, space.newtext(self.typecode))) - if ptrval == 0: + if ptrval == rffi.cast(rffi.ULONG, 0): from pypy.module._cppyy import interp_cppyy return interp_cppyy.get_nullptr(space) - return arr.fromaddress(space, ptrval, sys.maxint) + shape = letter2tp(space, self.typecode) + return W_ArrayInstance(space, shape, sys.maxint/shape.size, ptrval) class VoidExecutor(FunctionExecutor): @@ -125,7 +125,6 @@ class CStringExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ccpresult = rffi.cast(rffi.CCHARP, lresult) @@ -136,7 +135,6 @@ class ConstructorExecutor(FunctionExecutor): - def execute(self, space, cppmethod, cpptype, num_args, args): from pypy.module._cppyy import interp_cppyy newthis = capi.c_constructor(space, cppmethod, cpptype, num_args, args) @@ -144,80 +142,77 @@ return space.newlong(rffi.cast(rffi.LONG, newthis)) # really want ptrdiff_t here -class InstancePtrExecutor(FunctionExecutor): - _immutable_fields_ = ['cppclass'] +class InstanceExecutor(FunctionExecutor): + # For return of a C++ instance by pointer: MyClass* func() + _immutable_fields_ = ['clsdecl'] - def __init__(self, space, cppclass): - FunctionExecutor.__init__(self, space, cppclass) - self.cppclass = cppclass + def __init__(self, space, clsdecl): + FunctionExecutor.__init__(self, space, clsdecl) + self.clsdecl = clsdecl + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, + obj, self.clsdecl, do_cast=False, python_owns=True, fresh=True) + + def execute(self, space, cppmethod, cppthis, num_args, args): + oresult = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.clsdecl) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, oresult)) + + +class InstancePtrExecutor(InstanceExecutor): + # For return of a C++ instance by pointer: MyClass* func() def cffi_type(self, space): state = space.fromcache(ffitypes.State) return state.c_voidp + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl) + def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) - return pyres + lresult = capi.c_call_l(space, cppmethod, cppthis, num_args, args) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, lresult)) def execute_libffi(self, space, cif_descr, funcaddr, buffer): jit_libffi.jit_ffi_call(cif_descr, funcaddr, buffer) - result = rffi.ptradd(buffer, cif_descr.exchange_result) - from pypy.module._cppyy import interp_cppyy - ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = rffi.ptradd(buffer, cif_descr.exchange_result) + obj = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, presult)[0]) + return self._wrap_result(space, obj) class InstancePtrPtrExecutor(InstancePtrExecutor): + # For return of a C++ instance by ptr-to-ptr or ptr-to-ref: MyClass*& func() def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) - ref_address = rffi.cast(rffi.VOIDPP, voidp_result) - ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) + presult = capi.c_call_r(space, cppmethod, cppthis, num_args, args) + ref = rffi.cast(rffi.VOIDPP, presult) + return self._wrap_result(space, rffi.cast(capi.C_OBJECT, ref[0])) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible -class InstanceExecutor(InstancePtrExecutor): - - def execute(self, space, cppmethod, cppthis, num_args, args): - from pypy.module._cppyy import interp_cppyy - long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) - ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) - - def execute_libffi(self, space, cif_descr, funcaddr, buffer): - from pypy.module._cppyy.interp_cppyy import FastCallNotPossible - raise FastCallNotPossible - class StdStringExecutor(InstancePtrExecutor): - def execute(self, space, cppmethod, cppthis, num_args, args): cstr, cstr_len = capi.c_call_s(space, cppmethod, cppthis, num_args, args) pystr = rffi.charpsize2str(cstr, cstr_len) capi.c_free(space, rffi.cast(rffi.VOIDP, cstr)) - return space.newbytes(pystr) + return space.newbytes(pystr) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible raise FastCallNotPossible class StdStringRefExecutor(InstancePtrExecutor): - - def __init__(self, space, cppclass): + def __init__(self, space, clsdecl): from pypy.module._cppyy import interp_cppyy - cppclass = interp_cppyy.scope_byname(space, capi.std_string_name) - InstancePtrExecutor.__init__(self, space, cppclass) + clsdecl = interp_cppyy.scope_byname(space, capi.std_string_name) + InstancePtrExecutor.__init__(self, space, clsdecl) class PyObjectExecutor(PtrTypeExecutor): - def wrap_result(self, space, lresult): space.getbuiltinmodule("cpyext") from pypy.module.cpyext.pyobject import PyObject, from_ref, make_ref, decref @@ -241,6 +236,41 @@ return self.wrap_result(space, rffi.cast(rffi.LONGP, result)[0]) +class SmartPointerExecutor(InstanceExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstanceExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False, python_owns=True, fresh=True) + +class SmartPointerPtrExecutor(InstancePtrExecutor): + _immutable_fields_ = ['smartdecl', 'deref'] + + def __init__(self, space, smartdecl, raw, deref): + # TODO: share this with SmartPointerExecutor through in mixin + from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl, get_pythonized_cppclass + w_raw = get_pythonized_cppclass(space, raw) + rawdecl = space.interp_w(W_CPPClassDecl, space.findattr(w_raw, space.newtext("__cppdecl__"))) + InstancePtrExecutor.__init__(self, space, rawdecl) + self.smartdecl = smartdecl + self.deref = deref + + def _wrap_result(self, space, obj): + from pypy.module._cppyy import interp_cppyy + # TODO: this is a pointer to a smart pointer, take ownership on the smart one? + return interp_cppyy.wrap_cppinstance(space, obj, self.clsdecl, + self.smartdecl, self.deref, do_cast=False) + + _executors = {} def get_executor(space, name): # Matching of 'name' to an executor factory goes through up to four levels: @@ -253,7 +283,7 @@ name = capi.c_resolve_name(space, name) - # 1) full, qualified match + # full, qualified match try: return _executors[name](space, None) except KeyError: @@ -262,13 +292,13 @@ compound = helper.compound(name) clean_name = capi.c_resolve_name(space, helper.clean_type(name)) - # 1a) clean lookup + # clean lookup try: return _executors[clean_name+compound](space, None) except KeyError: pass - # 2) drop '&': by-ref is pretty much the same as by-value, python-wise + # drop '&': by-ref is pretty much the same as by-value, python-wise if compound and compound[len(compound)-1] == '&': # TODO: this does not actually work with Reflex (?) try: @@ -276,19 +306,29 @@ except KeyError: pass - # 3) types/classes, either by ref/ptr or by value + # types/classes, either by ref/ptr or by value from pypy.module._cppyy import interp_cppyy cppclass = interp_cppyy.scope_byname(space, clean_name) if cppclass: # type check for the benefit of the annotator from pypy.module._cppyy.interp_cppyy import W_CPPClassDecl - cppclass = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + clsdecl = space.interp_w(W_CPPClassDecl, cppclass, can_be_None=False) + + # check smart pointer type + check_smart = capi.c_smartptr_info(space, clean_name) + if check_smart[0]: + if compound == '': + return SmartPointerExecutor(space, clsdecl, check_smart[1], check_smart[2]) + elif compound == '*' or compound == '&': + return SmartPointerPtrExecutor(space, clsdecl, check_smart[1], check_smart[2]) + # fall through: can still return smart pointer in non-smart way + if compound == '': - return InstanceExecutor(space, cppclass) + return InstanceExecutor(space, clsdecl) elif compound == '*' or compound == '&': - return InstancePtrExecutor(space, cppclass) + return InstancePtrExecutor(space, clsdecl) elif compound == '**' or compound == '*&': - return InstancePtrPtrExecutor(space, cppclass) + return InstancePtrPtrExecutor(space, clsdecl) elif "(anonymous)" in name: # special case: enum w/o a type name return _executors["internal_enum_type_t"](space, None) diff --git a/pypy/module/_cppyy/ffitypes.py b/pypy/module/_cppyy/ffitypes.py --- a/pypy/module/_cppyy/ffitypes.py +++ b/pypy/module/_cppyy/ffitypes.py @@ -74,15 +74,52 @@ # allow int to pass to char and make sure that str is of length 1 if space.isinstance_w(w_value, space.w_int): ival = space.c_int_w(w_value) + if ival < -128 or 127 < ival: + raise oefmt(space.w_ValueError, "char arg not in range(-128,128)") + + value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) + else: + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) + if len(value) != 1: + raise oefmt(space.w_ValueError, + "char expected, got string of size %d", len(value)) + + value = rffi.cast(rffi.CHAR, value[0]) + return value # turn it into a "char" to the annotator + + def cffi_type(self, space): + state = space.fromcache(State) + return state.c_char + +class UCharTypeMixin(object): + _mixin_ = True + _immutable_fields_ = ['c_type', 'c_ptrtype'] + + c_type = rffi.UCHAR + c_ptrtype = rffi.CCHARP # there's no such thing as rffi.UCHARP + + def _wrap_object(self, space, obj): + return space.newbytes(obj) + + def _unwrap_object(self, space, w_value): + # allow int to pass to char and make sure that str is of length 1 + if space.isinstance_w(w_value, space.w_int): + ival = space.c_int_w(w_value) if ival < 0 or 256 <= ival: raise oefmt(space.w_ValueError, "char arg not in range(256)") value = rffi.cast(rffi.CHAR, space.c_int_w(w_value)) else: - value = space.text_w(w_value) + if space.isinstance_w(w_value, space.w_text): + value = space.text_w(w_value) + else: + value = space.bytes_w(w_value) if len(value) != 1: raise oefmt(space.w_ValueError, - "char expected, got string of size %d", len(value)) + "usigned char expected, got string of size %d", len(value)) value = rffi.cast(rffi.CHAR, value[0]) return value # turn it into a "char" to the annotator @@ -277,6 +314,7 @@ "NOT_RPYTHON" if c_type == bool: return BoolTypeMixin if c_type == rffi.CHAR: return CharTypeMixin + if c_type == rffi.UCHAR: return UCharTypeMixin if c_type == rffi.SHORT: return ShortTypeMixin if c_type == rffi.USHORT: return UShortTypeMixin if c_type == rffi.INT: return IntTypeMixin diff --git a/pypy/module/_cppyy/helper.py b/pypy/module/_cppyy/helper.py --- a/pypy/module/_cppyy/helper.py +++ b/pypy/module/_cppyy/helper.py @@ -1,3 +1,4 @@ +import sys from rpython.rlib import rstring @@ -116,6 +117,17 @@ # TODO: perhaps absorb or "pythonify" these operators? return cppname +if sys.hexversion < 0x3000000: + CPPYY__div__ = "__div__" + CPPYY__idiv__ = "__idiv__" + CPPYY__long__ = "__long__" + CPPYY__bool__ = "__nonzero__" +else: + CPPYY__div__ = "__truediv__" + CPPYY__idiv__ = "__itruediv__" + CPPYY__long__ = "__int__" + CPPYY__bool__ = "__bool__" + # _operator_mappings["[]"] = "__setitem__" # depends on return type # _operator_mappings["+"] = "__add__" # depends on # of args (see __pos__) # _operator_mappings["-"] = "__sub__" # id. (eq. __neg__) @@ -123,7 +135,7 @@ # _operator_mappings["[]"] = "__getitem__" # depends on return type _operator_mappings["()"] = "__call__" -_operator_mappings["/"] = "__div__" # __truediv__ in p3 +_operator_mappings["/"] = CPPYY__div__ _operator_mappings["%"] = "__mod__" _operator_mappings["**"] = "__pow__" # not C++ _operator_mappings["<<"] = "__lshift__" @@ -136,7 +148,7 @@ _operator_mappings["+="] = "__iadd__" _operator_mappings["-="] = "__isub__" _operator_mappings["*="] = "__imul__" -_operator_mappings["/="] = "__idiv__" # __itruediv__ in p3 +_operator_mappings["/="] = CPPYY__idiv__ _operator_mappings["%="] = "__imod__" _operator_mappings["**="] = "__ipow__" _operator_mappings["<<="] = "__ilshift__" @@ -154,7 +166,7 @@ # the following type mappings are "exact" _operator_mappings["const char*"] = "__str__" _operator_mappings["int"] = "__int__" -_operator_mappings["long"] = "__long__" # __int__ in p3 +_operator_mappings["long"] = CPPYY__long__ _operator_mappings["double"] = "__float__" # the following type mappings are "okay"; the assumption is that they @@ -163,13 +175,13 @@ _operator_mappings["char*"] = "__str__" _operator_mappings["short"] = "__int__" _operator_mappings["unsigned short"] = "__int__" -_operator_mappings["unsigned int"] = "__long__" # __int__ in p3 -_operator_mappings["unsigned long"] = "__long__" # id. -_operator_mappings["long long"] = "__long__" # id. -_operator_mappings["unsigned long long"] = "__long__" # id. +_operator_mappings["unsigned int"] = CPPYY__long__ +_operator_mappings["unsigned long"] = CPPYY__long__ +_operator_mappings["long long"] = CPPYY__long__ From pypy.commits at gmail.com Thu Jun 14 01:43:18 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 13 Jun 2018 22:43:18 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix interned stringes, space.newtext, use of W_Unicode._value Message-ID: <5b220076.1c69fb81.e861c.b37d@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r94759:eee2a361ae13 Date: 2018-06-13 20:21 -0700 http://bitbucket.org/pypy/pypy/changeset/eee2a361ae13/ Log: fix interned stringes, space.newtext, use of W_Unicode._value diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -424,7 +424,7 @@ self.builtin_modules = {} self.reloading_modules = {} - self.interned_strings = make_weak_value_dictionary(self, unicode, W_Root) + self.interned_strings = make_weak_value_dictionary(self, str, W_Root) self.actionflag = ActionFlag() # changed by the signal module self.check_signal_action = None # changed by the signal module make_finalizer_queue(W_Root, self) @@ -826,7 +826,7 @@ assert isinstance(w_u, W_Root) # and is not None u = self.unicode_w(w_u) if not we_are_translated(): - assert type(u) is unicode + assert type(u) is str w_u1 = self.interned_strings.get(u) if w_u1 is None: w_u1 = w_u @@ -839,12 +839,12 @@ # returns a "text" object (ie str in python2 and unicode in python3) if not we_are_translated(): assert type(s) is str - u = s.decode('utf-8') - w_s1 = self.interned_strings.get(u) + #u = s.decode('utf-8') + w_s1 = self.interned_strings.get(s) if w_s1 is None: - w_s1 = self.newunicode(u) + w_s1 = self.newtext(s) if self._side_effects_ok(): - self.interned_strings.set(u, w_s1) + self.interned_strings.set(s, w_s1) return w_s1 def _revdb_startup(self): @@ -1619,7 +1619,7 @@ an utf-8 encoded rpython string. """ assert w_obj is not None - return w_obj.text_w(self) + return w_obj.utf8_w(self) @not_rpython # tests only; should be replaced with bytes_w or text_w def str_w(self, w_obj): diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -11,7 +11,6 @@ class Hit(Exception): pass -from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii from pypy.interpreter import unicodehelper as uh from pypy.module._codecs.interp_codecs import CodecState diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.objectmodel import specialize from rpython.rlib.rstring import StringBuilder -from rpython.rlib import rutf8 +from rpython.rlib import rutf8, runicode from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rtyper.lltypesystem import rffi from pypy.module.unicodedata import unicodedb diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1184,7 +1184,7 @@ # we should implement the same shortcuts as we do for BytesDictStrategy def decodekey_str(self, key): - return str_decode_utf8(self.space, key, allow_surrogates=True) + return str_decode_utf8(key, "string", True, None, allow_surrogates=True)[0] def setitem_str(self, w_dict, key, w_value): assert key is not None diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -388,7 +388,8 @@ return W_BytearrayObject(l) def newtext(self, s): - return self.newtext(str_decode_utf8(self, s, allow_surrogates=True)) + lgt = rutf8.check_utf8(s, True) + return W_UnicodeObject(s, lgt) def newtext_or_none(self, s): if s is None: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1827,7 +1827,7 @@ def unicode_to_decimal_w(space, w_unistr, allow_surrogates=False): if not isinstance(w_unistr, W_UnicodeObject): raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) - value = _rpy_unicode_to_decimal_w(space, w_unistr._value) + value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space)) return unicodehelper.encode_utf8(space, value, allow_surrogates=allow_surrogates) From pypy.commits at gmail.com Thu Jun 14 01:43:24 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 13 Jun 2018 22:43:24 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: newunicode -> newtext, newtext now accepts utf8-encoded bytes or unicode Message-ID: <5b22007c.1c69fb81.7a923.4c39@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r94761:4c4b3a83fd29 Date: 2018-06-13 21:00 -0700 http://bitbucket.org/pypy/pypy/changeset/4c4b3a83fd29/ Log: newunicode -> newtext, newtext now accepts utf8-encoded bytes or unicode diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py --- a/pypy/interpreter/astcompiler/fstring.py +++ b/pypy/interpreter/astcompiler/fstring.py @@ -23,7 +23,7 @@ def f_constant_string(astbuilder, joined_pieces, u, atom_node): space = astbuilder.space - add_constant_string(astbuilder, joined_pieces, space.newunicode(u), + add_constant_string(astbuilder, joined_pieces, space.newtext(u), atom_node) def f_string_compile(astbuilder, source, atom_node): diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -107,7 +107,7 @@ def getrepr(self, space, info, moreinfo=u''): addrstring = unicode(self.getaddrstring(space)) - return space.newunicode(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) + return space.newtext(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) def getslotvalue(self, index): raise NotImplementedError diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -307,7 +307,7 @@ w_value = self._w_value if w_value is None: value = self._compute_value(space) - self._w_value = w_value = space.newunicode(value) + self._w_value = w_value = space.newtext(value) return w_value def _compute_value(self, space): @@ -626,7 +626,7 @@ msg = u'Windows Error %d' % winerror w_errno = space.w_None w_winerror = space.newint(winerror) - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg) else: errno = e.errno if errno == EINTR: @@ -640,7 +640,7 @@ msg = u'error %d' % errno w_errno = space.newint(errno) w_winerror = space.w_None - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg) if w_filename is None: w_filename = space.w_None @@ -672,7 +672,7 @@ def exception_from_errno(space, w_type, errno): msg = strerror(errno) w_error = space.call_function(w_type, space.newint(errno), - space.newunicode(msg)) + space.newtext(msg)) return OperationError(w_type, w_error) def exception_from_saved_errno(space, w_type): diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -313,7 +313,7 @@ tup_base = [] tup_state = [ space.newtext(self.name), - space.newunicode(self.qualname), + space.newtext(self.qualname), w_doc, self.code, w_func_globals, @@ -430,7 +430,7 @@ "__name__ must be set to a string object") def fget_func_qualname(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def fset_func_qualname(self, space, w_name): try: @@ -556,7 +556,7 @@ name = u'?' objrepr = space.unicode_w(space.repr(self.w_instance)) s = u'' % (name, objrepr) - return space.newunicode(s) + return space.newtext(s) def descr_method_getattribute(self, w_attr): space = self.space @@ -598,7 +598,7 @@ else: w_builtins = space.getbuiltinmodule('builtins') new_inst = space.getattr(w_builtins, space.newtext('getattr')) - tup = [w_instance, space.newunicode(w_function.getname(space))] + tup = [w_instance, space.newtext(w_function.getname(space))] return space.newtuple([new_inst, space.newtuple(tup)]) @@ -699,7 +699,7 @@ return self.space.newtext('' % (self.name,)) def descr__reduce__(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def is_builtin_code(w_func): from pypy.interpreter.gateway import BuiltinCode diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -1122,7 +1122,7 @@ kw_defs_w = [] for name, w_def in sorted(alldefs_w.items()): assert name in sig.kwonlyargnames - w_name = space.newunicode(name.decode('utf-8')) + w_name = space.newtext(name.decode('utf-8')) kw_defs_w.append((w_name, w_def)) return defs_w, kw_defs_w diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py --- a/pypy/interpreter/generator.py +++ b/pypy/interpreter/generator.py @@ -42,7 +42,7 @@ def descr__repr__(self, space): addrstring = self.getaddrstring(space) - return space.newunicode(u"<%s object %s at 0x%s>" % + return space.newtext(u"<%s object %s at 0x%s>" % (unicode(self.KIND), self.get_qualname(), unicode(addrstring))) @@ -215,7 +215,7 @@ e2.record_context(space, space.getexecutioncontext()) raise e2 else: - space.warn(space.newunicode(u"generator '%s' raised StopIteration" + space.warn(space.newtext(u"generator '%s' raised StopIteration" % self.get_qualname()), space.w_PendingDeprecationWarning) @@ -306,7 +306,7 @@ "__name__ must be set to a string object") def descr__qualname__(self, space): - return space.newunicode(self.get_qualname()) + return space.newtext(self.get_qualname()) def descr_set__qualname__(self, space, w_name): try: @@ -398,7 +398,7 @@ self.frame.last_instr == -1: space = self.space msg = u"coroutine '%s' was never awaited" % self.get_qualname() - space.warn(space.newunicode(msg), space.w_RuntimeWarning) + space.warn(space.newtext(msg), space.w_RuntimeWarning) GeneratorOrCoroutine._finalize_(self) diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -454,6 +454,6 @@ # co_name should be an identifier name = self.co_name.decode('utf-8') fn = space.unicode_w(self.w_filename) - return space.newunicode(u'

' % (
+        return space.newtext(u'' % (
             name, unicode(self.getaddrstring(space)), fn,
             -1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,7 +1081,7 @@
             try:
                 w_pkgname = space.getattr(
                     w_module, space.newtext('__name__'))
-                w_fullname = space.newunicode(u'%s.%s' %
+                w_fullname = space.newtext(u'%s.%s' %
                     (space.unicode_w(w_pkgname), space.unicode_w(w_name)))
                 return space.getitem(space.sys.get('modules'), w_fullname)
             except OperationError:
@@ -1626,7 +1626,7 @@
         if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC:
             w_spec = self.popvalue()
         else:
-            w_spec = space.newunicode(u'')
+            w_spec = space.newtext(u'')
         w_value = self.popvalue()
         #
         conversion = oparg & consts.FVC_MASK
@@ -1649,7 +1649,7 @@
             w_item = self.peekvalue(i)
             lst.append(space.unicode_w(w_item))
         self.dropvalues(itemcount)
-        w_res = space.newunicode(u''.join(lst))
+        w_res = space.newtext(u''.join(lst))
         self.pushvalue(w_res)
 
     def _revdb_load_var(self, oparg):
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -42,7 +42,7 @@
             if len(self.text) != offset:
                 text, _ = str_decode_utf_8(self.text, len(self.text),
                                            'replace')
-            w_text = space.newunicode(text)
+            w_text = space.newtext(text)
         return space.newtuple([
             space.newtext(self.msg),
             space.newtuple([
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -115,7 +115,7 @@
             return W_FString(substr, rawmode)
         else:
             v = unicodehelper.decode_utf8(space, substr)
-            return space.newunicode(v)
+            return space.newtext(v)
 
     v = PyString_DecodeEscape(space, substr, 'strict', encoding)
     return space.newbytes(v)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -95,7 +95,6 @@
     def wrap(self, obj):
         return obj
     newtext = wrap
-    newunicode = wrap
 
     def text_w(self, s):
         return self.unicode_w(s).encode('utf-8')
diff --git a/pypy/interpreter/test/test_error.py b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -135,7 +135,7 @@
         w_None = None
         def wrap(self, obj):
             return [obj]
-        newint = newtext = newunicode = newfilename = wrap
+        newint = newtext = newfilename = wrap
         def call_function(self, exc, w_errno, w_msg, w_filename=None, *args):
             return (exc, w_errno, w_msg, w_filename)
     space = FakeSpace()
diff --git a/pypy/interpreter/test/test_fsencode.py b/pypy/interpreter/test/test_fsencode.py
--- a/pypy/interpreter/test/test_fsencode.py
+++ b/pypy/interpreter/test/test_fsencode.py
@@ -70,7 +70,7 @@
             strs.append(self.special_char)
         for st in strs:
             # check roundtrip
-            w_st = space.newunicode(st)
+            w_st = space.newtext(st)
             w_enc = space.fsencode(w_st)
             w_st2 = space.fsdecode(w_enc)
             assert space.eq_w(w_st, w_st2)
@@ -81,7 +81,7 @@
 
     def test_null_byte(self):
         space = self.space
-        w_u = space.newunicode(u'abc\x00def')
+        w_u = space.newtext(u'abc\x00def')
         # this can behave in two different ways depending on how
         # much initialized the space is: space.fsencode() can raise
         # ValueError directly, or return a wrapped bytes with the 0
@@ -94,7 +94,7 @@
         if self.special_char:
             strs.append(self.special_char)
         for st in strs:
-            w_st = space.newunicode(st)
+            w_st = space.newtext(st)
             w_enc = space.fsencode(w_st)
             space.appexec([w_st, w_enc], """(u, s):
                 import __pypy__
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -87,7 +87,7 @@
         return space.call_method(w_string, 'decode',
                                  getfilesystemencoding(space),
                                  space.newtext('surrogateescape'))
-    return space.newunicode(uni)
+    return space.newtext(uni)
 
 def fsencode(space, w_uni):
     from pypy.module._codecs import interp_codecs
diff --git a/pypy/module/__builtin__/descriptor.py b/pypy/module/__builtin__/descriptor.py
--- a/pypy/module/__builtin__/descriptor.py
+++ b/pypy/module/__builtin__/descriptor.py
@@ -37,7 +37,7 @@
             starttype_name = self.w_starttype.getname(space)
         else:
             starttype_name = u'NULL'
-        return space.newunicode(u", %s>" % (
+        return space.newtext(u", %s>" % (
             starttype_name, objtype_name))
 
     def get(self, space, w_obj, w_type=None):
diff --git a/pypy/module/__pypy__/interp_stderrprinter.py b/pypy/module/__pypy__/interp_stderrprinter.py
--- a/pypy/module/__pypy__/interp_stderrprinter.py
+++ b/pypy/module/__pypy__/interp_stderrprinter.py
@@ -17,7 +17,7 @@
 
     def descr_repr(self, space):
         addrstring = unicode(self.getaddrstring(space))
-        return space.newunicode(u"" %
+        return space.newtext(u"" %
                                 (self.fd, addrstring))
 
     def descr_noop(self, space):
diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py
--- a/pypy/module/_cffi_backend/cerrno.py
+++ b/pypy/module/_cffi_backend/cerrno.py
@@ -27,4 +27,4 @@
     if code == -1:
         code = GetLastError_alt_saved()
     message = FormatErrorW(code)
-    return space.newtuple([space.newint(code), space.newunicode(message)])
+    return space.newtuple([space.newint(code), space.newtext(message)])
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -298,7 +298,7 @@
             oc = ord(obj[pos])
             raw_unicode_escape_helper_unicode(builder, oc)
             pos += 1
-        return space.newtuple([space.newunicode(builder.build()), w_end])
+        return space.newtuple([space.newtext(builder.build()), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         obj = space.bytes_w(space.getattr(w_exc, space.newtext('object')))
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
@@ -310,7 +310,7 @@
             oc = ord(obj[pos])
             raw_unicode_escape_helper_unicode(builder, oc)
             pos += 1
-        return space.newtuple([space.newunicode(builder.build()), w_end])
+        return space.newtuple([space.newtext(builder.build()), w_end])
     else:
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
@@ -456,7 +456,7 @@
             ch = 0
         if ch == 0:
             raise OperationError(space.type(w_exc), w_exc)
-        return space.newtuple([space.newunicode(unichr(ch)),
+        return space.newtuple([space.newtext(unichr(ch)),
                                space.newint(start + bytelength)])
     else:
         raise oefmt(space.w_TypeError,
@@ -495,7 +495,7 @@
         if not consumed:
             # codec complained about ASCII byte.
             raise OperationError(space.type(w_exc), w_exc)
-        return space.newtuple([space.newunicode(replace),
+        return space.newtuple([space.newtext(replace),
                                space.newint(start + consumed)])
     else:
         raise oefmt(space.w_TypeError,
@@ -746,7 +746,7 @@
             string, len(string), errors,
             final, state.decode_error_handler,
             force_ignore=False)
-        return space.newtuple([space.newunicode(result), space.newint(consumed)])
+        return space.newtuple([space.newtext(result), space.newint(consumed)])
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=False"
@@ -1014,7 +1014,7 @@
     result, consumed = runicode.str_decode_raw_unicode_escape(
         string, len(string), errors,
         final, state.decode_error_handler)
-    return space.newtuple([space.newunicode(result), space.newint(consumed)])
+    return space.newtuple([space.newtext(result), space.newint(consumed)])
 
 # ____________________________________________________________
 # Unicode-internal
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -156,12 +156,12 @@
 def _get_escapechar(space, dialect):
     if dialect.escapechar == u'\0':
         return space.w_None
-    return space.newunicode(dialect.escapechar)
+    return space.newtext(dialect.escapechar)
 
 def _get_quotechar(space, dialect):
     if dialect.quotechar == u'\0':
         return space.w_None
-    return space.newunicode(dialect.quotechar)
+    return space.newtext(dialect.quotechar)
 
 
 W_Dialect.typedef = TypeDef(
@@ -169,12 +169,12 @@
         __new__ = interp2app(W_Dialect___new__),
 
         delimiter        = interp_attrproperty('delimiter', W_Dialect,
-            wrapfn='newunicode'),
+            wrapfn='newtext'),
         doublequote      = interp_attrproperty('doublequote', W_Dialect,
             wrapfn='newbool'),
         escapechar       = GetSetProperty(_get_escapechar, cls=W_Dialect),
         lineterminator   = interp_attrproperty('lineterminator', W_Dialect,
-            wrapfn='newunicode'),
+            wrapfn='newtext'),
         quotechar        = GetSetProperty(_get_quotechar, cls=W_Dialect),
         quoting          = interp_attrproperty('quoting', W_Dialect,
             wrapfn='newint'),
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -31,7 +31,7 @@
         msg = u'line %d: %s' % (self.line_num, msg)
         w_module = space.getbuiltinmodule('_csv')
         w_error = space.getattr(w_module, space.newtext('Error'))
-        raise OperationError(w_error, space.newunicode(msg))
+        raise OperationError(w_error, space.newtext(msg))
 
     def add_char(self, field_builder, c):
         assert field_builder is not None
@@ -44,9 +44,9 @@
         field = field_builder.build()
         if self.numeric_field:
             self.numeric_field = False
-            w_obj = space.call_function(space.w_float, space.newunicode(field))
+            w_obj = space.call_function(space.w_float, space.newtext(field))
         else:
-            w_obj = space.newunicode(field)
+            w_obj = space.newtext(field)
         self.fields_w.append(w_obj)
 
     def next_w(self):
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -115,7 +115,7 @@
         rec.append(dialect.lineterminator)
 
         line = rec.build()
-        return space.call_function(self.w_filewrite, space.newunicode(line))
+        return space.call_function(self.w_filewrite, space.newtext(line))
 
     def writerows(self, w_seqseq):
         """Construct and write a series of sequences to a csv file.
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -732,7 +732,7 @@
         w_bytes = space.call_method(self.w_buffer, "read")
         w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
         check_decoded(space, w_decoded)
-        w_result = space.newunicode(self.decoded.get_chars(-1))
+        w_result = space.newtext(self.decoded.get_chars(-1))
         w_final = space.add(w_result, w_decoded)
         self.snapshot = None
         return w_final
@@ -771,7 +771,7 @@
         self._check_closed(space)
         self._writeflush(space)
         limit = convert_size(space, w_limit)
-        return space.newunicode(self._readline(space, limit))
+        return space.newtext(self._readline(space, limit))
 
     def _readline(self, space, limit):
         # This is a separate function so that readline_w() can be jitted.
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -253,7 +253,7 @@
                 s = create_spec_for_object(space, self.w_type)
             else:
                 s = create_spec_for_method(space, self.w_func, self.w_type)
-            self.w_string = space.newunicode(s)
+            self.w_string = space.newtext(s)
         return self.w_string
 
 W_DelayedBuiltinStr.typedef = TypeDef(
diff --git a/pypy/module/_multiprocessing/interp_win32_py3.py b/pypy/module/_multiprocessing/interp_win32_py3.py
--- a/pypy/module/_multiprocessing/interp_win32_py3.py
+++ b/pypy/module/_multiprocessing/interp_win32_py3.py
@@ -9,7 +9,7 @@
     message = rwin32.FormatErrorW(errno)
     w_errcode = space.newint(errno)
     return OperationError(space.w_WindowsError,
-                         space.newtuple([w_errcode, space.newunicode(message),
+                         space.newtuple([w_errcode, space.newtext(message),
                         space.w_None, w_errcode]))
 
 @unwrap_spec(handle=int)
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -630,7 +630,7 @@
 if _MS_WINDOWS:
     @unwrap_spec(code=int)
     def FormatError(space, code):
-        return space.newunicode(rwin32.FormatErrorW(code))
+        return space.newtext(rwin32.FormatErrorW(code))
 
     @unwrap_spec(hresult=int)
     def check_HRESULT(space, hresult):
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -235,7 +235,7 @@
         try:
             msg = (u"unclosed %s" %
                    space.unicode_w(space.repr(self)))
-            space.warn(space.newunicode(msg), space.w_ResourceWarning)
+            space.warn(space.newtext(msg), space.w_ResourceWarning)
         except OperationError as e:
             # Spurious errors can appear at shutdown
             if e.match(space, space.w_Warning):
@@ -863,9 +863,9 @@
             if eintr_retry:
                 return       # only return None if eintr_retry==True
         w_exception = space.call_function(w_exception_class, space.newint(e.errno),
-                                      space.newunicode(message))
+                                      space.newtext(message))
     else:
-        w_exception = space.call_function(w_exception_class, space.newunicode(message))
+        w_exception = space.call_function(w_exception_class, space.newtext(message))
     raise OperationError(w_exception_class, w_exception)
 
 def explicit_socket_error(space, msg):
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -134,7 +134,7 @@
         else:
             usep = u', '
             uflags = u'|'.join([item.decode('latin-1') for item in flag_items])
-        return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags))
+        return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags))
 
     def fget_groupindex(self, space):
         w_groupindex = self.w_groupindex
@@ -568,7 +568,7 @@
         u = space.unicode_w(space.repr(w_s))
         if len(u) > 50:
             u = u[:50]
-        return space.newunicode(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
+        return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
                           (start, end, u))
 
     def cannot_copy_w(self):
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -250,7 +250,7 @@
     message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
                                     space.unicode_w(w_name),
                                     space.unicode_w(w_text))
-    space.call_method(w_stderr, "write", space.newunicode(message))
+    space.call_method(w_stderr, "write", space.newtext(message))
 
     # Print "  source_line\n"
     if not w_sourceline:
@@ -277,7 +277,7 @@
         if c not in u' \t\014':
             message = u"  %s\n" % (line[i:],)
             break
-    space.call_method(w_stderr, "write", space.newunicode(message))
+    space.call_method(w_stderr, "write", space.newtext(message))
 
 def do_warn(space, w_message, w_category, stacklevel):
     context_w = setup_context(space, stacklevel)
diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -11,7 +11,7 @@
     message = rwin32.FormatErrorW(errcode)
     w_errcode = space.newint(errcode)
     raise OperationError(space.w_WindowsError,
-                         space.newtuple([w_errcode, space.newunicode(message),
+                         space.newtuple([w_errcode, space.newtext(message),
                                         space.w_None, w_errcode]))
 
 class W_HKEY(W_Root):
@@ -33,7 +33,7 @@
         return space.newint(self.as_int())
 
     def descr_repr(self, space):
-        return space.newunicode(u"" % (self.as_int(),))
+        return space.newtext(u"" % (self.as_int(),))
 
     def descr_int(self, space):
         return space.newint(self.as_int())
@@ -271,7 +271,7 @@
                         raiseWindowsError(space, ret, 'RegQueryValue')
                     length = intmask(bufsize_p[0] - 1) / 2
                     wide_buf = rffi.cast(rffi.CWCHARP, buf)
-                    return space.newunicode(rffi.wcharp2unicoden(wide_buf, length))
+                    return space.newtext(rffi.wcharp2unicoden(wide_buf, length))
 
 def convert_to_regdata(space, w_value, typ):
     '''
@@ -378,7 +378,7 @@
             if buf[buflen - 1] == '\x00':
                 buflen -= 1
             s = rffi.wcharp2unicoden(buf, buflen)
-        w_s = space.newunicode(s)
+        w_s = space.newtext(s)
         return w_s
 
     elif typ == rwinreg.REG_MULTI_SZ:
@@ -396,7 +396,7 @@
             if len(s) == 0:
                 break
             s = u''.join(s)
-            l.append(space.newunicode(s))
+            l.append(space.newtext(s))
             i += 1
         return space.newlist(l)
 
@@ -645,7 +645,7 @@
 
                             length = intmask(retDataSize[0])
                             return space.newtuple([
-                                space.newunicode(rffi.wcharp2unicode(valuebuf)),
+                                space.newtext(rffi.wcharp2unicode(valuebuf)),
                                 convert_from_regdata(space, databuf,
                                                      length, retType[0]),
                                 space.newint(intmask(retType[0])),
@@ -678,7 +678,7 @@
                                        lltype.nullptr(rwin32.PFILETIME.TO))
             if ret != 0:
                 raiseWindowsError(space, ret, 'RegEnumKeyEx')
-            return space.newunicode(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf)))
+            return space.newtext(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf)))
 
 def QueryInfoKey(space, w_hkey):
     """tuple = QueryInfoKey(key) - Returns information about a key.
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -755,7 +755,7 @@
         elif self.typecode == "u":
             r = space.repr(self.descr_tounicode(space))
             s = u"array('u', %s)" % space.unicode_w(r)
-            return space.newunicode(s)
+            return space.newtext(s)
         else:
             r = space.repr(self.descr_tolist(space))
             s = "array('%s', %s)" % (self.typecode, space.text_w(r))
@@ -1141,7 +1141,7 @@
                     raise oefmt(space.w_ValueError,
                                 "array contains a unicode character out of "
                                 "range(0x110000)")
-                return space.newunicode(item)
+                return space.newtext(item)
             assert 0, "unreachable"
 
         # interface
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1710,7 +1710,7 @@
     msg = u"function %s not found in library %s" % (
         look_for.decode('utf-8'), space.unicode_w(space.newfilename(path)))
     w_path = space.newfilename(path)
-    raise_import_error(space, space.newunicode(msg), w_name, w_path)
+    raise_import_error(space, space.newtext(msg), w_name, w_path)
 
 def get_init_name(space, w_name):
     name_u = space.unicode_w(w_name)
@@ -1720,7 +1720,7 @@
         return 'PyInit_%s' % (basename,)
     except UnicodeEncodeError:
         basename = space.bytes_w(encode_object(
-            space, space.newunicode(basename_u), 'punycode', None))
+            space, space.newtext(basename_u), 'punycode', None))
         basename = basename.replace('-', '_')
         return 'PyInitU_%s' % (basename,)
 
diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py
--- a/pypy/module/cpyext/pyerrors.py
+++ b/pypy/module/cpyext/pyerrors.py
@@ -215,12 +215,12 @@
     if w_value:
         w_error = space.call_function(w_type,
                                       space.newint(errno),
-                                      space.newunicode(msg),
+                                      space.newtext(msg),
                                       w_value)
     else:
         w_error = space.call_function(w_type,
                                       space.newint(errno),
-                                      space.newunicode(msg))
+                                      space.newtext(msg))
     raise OperationError(w_type, w_error)
 
 @cpython_api([], rffi.INT_real, error=-1)
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -920,7 +920,7 @@
             if decimal >= 0:
                 ch = unichr(ord('0') + decimal)
         result.append(ch)
-    return space.newunicode(result.build())
+    return space.newtext(result.build())
 
 @cpython_api([PyObject, PyObject], rffi.INT_real, error=-2)
 def PyUnicode_Compare(space, w_left, w_right):
@@ -1064,4 +1064,4 @@
         if end > length:
             end = length
         result = usrc[start:end]
-    return space.newunicode(result)
+    return space.newtext(result)
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -155,7 +155,7 @@
         else:
             args_repr = u"()"
         clsname = self.getclass(space).getname(space)
-        return space.newunicode(clsname + args_repr)
+        return space.newtext(clsname + args_repr)
 
     def __repr__(self):
         """representation for debugging purposes"""
@@ -599,26 +599,26 @@
             # If available, winerror has the priority over errno
             if self.w_filename:
                 if self.w_filename2:
-                    return space.newunicode(u"[WinError %s] %s: %s -> %s" % (
+                    return space.newtext(u"[WinError %s] %s: %s -> %s" % (
                         winerror, strerror,
                         space.unicode_w(space.repr(self.w_filename)),
                         space.unicode_w(space.repr(self.w_filename2))))
-                return space.newunicode(u"[WinError %s] %s: %s" % (
+                return space.newtext(u"[WinError %s] %s: %s" % (
                     winerror, strerror,
                     space.unicode_w(space.repr(self.w_filename))))
-            return space.newunicode(u"[WinError %s] %s" % (
+            return space.newtext(u"[WinError %s] %s" % (
                 winerror, strerror))
         if self.w_filename:
             if self.w_filename2:
-                return space.newunicode(u"[Errno %s] %s: %s -> %s" % (
+                return space.newtext(u"[Errno %s] %s: %s -> %s" % (
                     errno, strerror,
                     space.unicode_w(space.repr(self.w_filename)),
                     space.unicode_w(space.repr(self.w_filename2))))
-            return space.newunicode(u"[Errno %s] %s: %s" % (
+            return space.newtext(u"[Errno %s] %s: %s" % (
                 errno, strerror,
                 space.unicode_w(space.repr(self.w_filename))))
         if self.w_errno and self.w_strerror:
-            return space.newunicode(u"[Errno %s] %s" % (
+            return space.newtext(u"[Errno %s] %s" % (
                 errno, strerror))
         return W_BaseException.descr_str(self, space)
 
@@ -787,7 +787,7 @@
             args_w = [self.args_w[0], w_tuple]
             args_repr = space.unicode_w(space.repr(space.newtuple(args_w)))
             clsname = self.getclass(space).getname(space)
-            return space.newunicode(clsname + args_repr)
+            return space.newtext(clsname + args_repr)
         else:
             return W_Exception.descr_repr(self, space)
 
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -838,7 +838,7 @@
 def strerror(space, code):
     """Translate an error code to a message string."""
     try:
-        return space.newunicode(_strerror(code))
+        return space.newtext(_strerror(code))
     except ValueError:
         raise oefmt(space.w_ValueError, "strerror() argument out of range")
 
@@ -885,7 +885,7 @@
         # started through main() instead of wmain()
         rwin32._wgetenv(u"")
         for key, value in rwin32._wenviron_items():
-            space.setitem(w_env, space.newunicode(key), space.newunicode(value))
+            space.setitem(w_env, space.newtext(key), space.newunicode(value))
 
     @unwrap_spec(name=unicode, value=unicode)
     def putenv(space, name, value):
@@ -935,7 +935,7 @@
   the file descriptor must refer to a directory.
   If this functionality is unavailable, using it raises NotImplementedError."""
     if space.is_none(w_path):
-        w_path = space.newunicode(u".")
+        w_path = space.newtext(u".")
     if space.isinstance_w(w_path, space.w_bytes):
         # XXX CPython doesn't follow this path either if w_path is,
         # for example, a memoryview or another buffer type
@@ -968,7 +968,7 @@
     result_w = [None] * len_result
     for i in range(len_result):
         if _WIN32:
-            result_w[i] = space.newunicode(result[i])
+            result_w[i] = space.newtext(result[i])
         else:
             result_w[i] = space.newfilename(result[i])
     return space.newlist(result_w)
@@ -2266,7 +2266,7 @@
                                  space.newtext(e.msg))
         except OSError as e:
             raise wrap_oserror2(space, e, w_path, eintr_retry=False)
-        return space.newunicode(result)
+        return space.newtext(result)
 
 
 def chflags():
diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -14,7 +14,7 @@
 def scandir(space, w_path=None):
     "scandir(path='.') -> iterator of DirEntry objects for given path"
     if space.is_none(w_path):
-        w_path = space.newunicode(u".")
+        w_path = space.newtext(u".")
 
     if not _WIN32:
         if space.isinstance_w(w_path, space.w_bytes):
@@ -45,7 +45,7 @@
     else:
         if len(path_prefix) > 0 and path_prefix[-1] not in (u'\\', u'/', u':'):
             path_prefix += u'\\'
-        w_path_prefix = space.newunicode(path_prefix)
+        w_path_prefix = space.newtext(path_prefix)
     if rposix.HAVE_FSTATAT:
         dirfd = rposix.c_dirfd(dirp)
     else:
@@ -153,12 +153,12 @@
             if not scandir_iterator.result_is_bytes:
                 w_name = self.space.fsdecode(w_name)
         else:
-            w_name = self.space.newunicode(name)
+            w_name = self.space.newtext(name)
         self.w_name = w_name
 
     def descr_repr(self, space):
         u = space.unicode_w(space.repr(self.w_name))
-        return space.newunicode(u"" % u)
+        return space.newtext(u"" % u)
 
     def fget_name(self, space):
         return self.w_name
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -111,7 +111,7 @@
             i19 = int_sub(i6, i87)
 
             i23 = unicodegetitem(ConstPtr(ptr92), i19)
-            p25 = newunicode(1)
+            p25 = newtext(1)
             unicodesetitem(p25, 0, i23)
             p97 = call_r(ConstClass(_rpy_unicode_to_decimal_w), p25, descr=)
             guard_no_exception(descr=...)
diff --git a/pypy/module/select/interp_select.py b/pypy/module/select/interp_select.py
--- a/pypy/module/select/interp_select.py
+++ b/pypy/module/select/interp_select.py
@@ -83,7 +83,7 @@
                 message = e.get_msg_unicode()
                 raise OperationError(space.w_OSError,
                                      space.newtuple([space.newint(e.errno),
-                                                     space.newunicode(message)]))
+                                                     space.newtext(message)]))
             finally:
                 self.running = False
             break
@@ -154,7 +154,7 @@
         if err != errno.EINTR:
             msg = _c.socket_strerror_unicode(err)
             raise OperationError(space.w_OSError, space.newtuple([
-                space.newint(err), space.newunicode(msg)]))
+                space.newint(err), space.newtext(msg)]))
         # got EINTR, automatic retry
         space.getexecutioncontext().checksignals()
         if timeout > 0.0:
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -459,8 +459,8 @@
 
     _set_module_object(space, "timezone", space.newint(timezone))
     _set_module_object(space, 'daylight', space.newint(daylight))
-    tzname_w = [space.newunicode(tzname[0].decode('latin-1')),
-                space.newunicode(tzname[1].decode('latin-1'))]
+    tzname_w = [space.newtext(tzname[0].decode('latin-1')),
+                space.newtext(tzname[1].decode('latin-1'))]
     _set_module_object(space, 'tzname', space.newtuple(tzname_w))
     _set_module_object(space, 'altzone', space.newint(altzone))
 
@@ -556,7 +556,7 @@
         # CPython calls PyUnicode_DecodeLocale here should we do the same?
         tm_zone = decode_utf8(space, rffi.charp2str(t.c_tm_zone),
                               allow_surrogates=True)
-        extra = [space.newunicode(tm_zone),
+        extra = [space.newtext(tm_zone),
                  space.newint(rffi.getintfield(t, 'c_tm_gmtoff'))]
         w_time_tuple = space.newtuple(time_tuple + extra)
     else:
@@ -579,7 +579,7 @@
         lltype.free(t_ref, flavor='raw')
         if not pbuf:
             raise OperationError(space.w_ValueError,
-                                 space.newunicode(_get_error_msg()))
+                                 space.newtext(_get_error_msg()))
         return pbuf
 
     tup_w = space.fixedview(w_tup)
@@ -745,7 +745,7 @@
 
     if not p:
         raise OperationError(space.w_ValueError,
-                             space.newunicode(_get_error_msg()))
+                             space.newtext(_get_error_msg()))
     return _tm_to_tuple(space, p)
 
 def localtime(space, w_seconds=None):
@@ -763,7 +763,7 @@
 
     if not p:
         raise OperationError(space.w_OSError,
-                             space.newunicode(_get_error_msg()))
+                             space.newtext(_get_error_msg()))
     return _tm_to_tuple(space, p)
 
 def mktime(space, w_tup):
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1437,7 +1437,7 @@
         typename = space.type(self).getname(space)
         w_seq = space.call_function(space.w_list, self)
         seq_repr = space.unicode_w(space.repr(w_seq))
-        return space.newunicode(u"%s(%s)" % (typename, seq_repr))
+        return space.newtext(u"%s(%s)" % (typename, seq_repr))
 
     def descr_len(self, space):
         return space.len(self.w_dict)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -44,7 +44,7 @@
         return space.str(self.w_mapping)
 
     def descr_repr(self, space):
-        return space.newunicode(u"mappingproxy(%s)" %
+        return space.newtext(u"mappingproxy(%s)" %
                                 (space.unicode_w(space.repr(self.w_mapping)),))
 
     @unwrap_spec(w_default=WrappedDefault(None))
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -371,9 +371,9 @@
     m.atom_str(TYPE_STRING, x.co_code)
     _marshal_tuple(space, x.co_consts_w, m)
     _marshal_tuple(space, x.co_names_w, m)   # list of w_unicodes
-    co_varnames_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_varnames]
-    co_freevars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_freevars]
-    co_cellvars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_cellvars]
+    co_varnames_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_varnames]
+    co_freevars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_freevars]
+    co_cellvars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_cellvars]
     _marshal_tuple(space, co_varnames_w, m)  # more lists, now of w_unicodes
     _marshal_tuple(space, co_freevars_w, m)
     _marshal_tuple(space, co_cellvars_w, m)
@@ -453,7 +453,7 @@
 @unmarshaller(TYPE_UNICODE)
 def unmarshal_unicode(space, u, tc):
     uc = _decode_utf8(space, u.get_str())
-    return space.newunicode(uc)
+    return space.newtext(uc)
 
 @unmarshaller(TYPE_INTERNED)
 def unmarshal_interned(space, u, tc):
@@ -466,7 +466,7 @@
     else:
         lng = u.get_lng()
     s = u.get(lng)
-    w_u = u.space.newunicode(s.decode('latin-1'))
+    w_u = u.space.newtext(s.decode('latin-1'))
     if interned:
         w_u = u.space.new_interned_w_str(w_u)
     return w_u
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -219,7 +219,7 @@
             if index == -1:
                 kwarg = name[:i]
                 if self.is_unicode:
-                    w_kwarg = space.newunicode(kwarg)
+                    w_kwarg = space.newtext(kwarg)
                 else:
                     w_kwarg = space.newbytes(kwarg)
                 w_arg = space.getitem(self.w_kwargs, w_kwarg)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -382,13 +382,18 @@
         return W_MemoryView(view)
 
     def newbytes(self, s):
-        assert isinstance(s, str)
+        assert isinstance(s, bytes)
         return W_BytesObject(s)
 
     def newbytearray(self, l):
         return W_BytearrayObject(l)
 
+    @specialize.argtype(1)
     def newtext(self, s):
+        if isinstance(s, str):
+            s, lgt, chk = str_decode_utf8(s, "string", True, None,
+                                           allow_surrogates=True)
+            return W_UnicodeObject(s, lgt)
         lgt = rutf8.check_utf8(s, True)
         return W_UnicodeObject(s, lgt)
 
@@ -399,7 +404,6 @@
 
     def newutf8(self, utf8s, length):
         assert utf8s is not None
-        assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length)
 
     def newfilename(self, s):
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1304,10 +1304,6 @@
             return obj.decode('ascii')
         return obj
 
-    def newunicode(self, u):
-        assert isinstance(u, unicode)
-        return u
-
     def newtext(self, string):
         assert isinstance(string, str)
         return string.decode('utf-8')
diff --git a/pypy/objspace/std/test/test_stdobjspace.py b/pypy/objspace/std/test/test_stdobjspace.py
--- a/pypy/objspace/std/test/test_stdobjspace.py
+++ b/pypy/objspace/std/test/test_stdobjspace.py
@@ -14,7 +14,7 @@
 
     def test_utf8(self):
         assert self.space.isinstance_w(self.space.newtext("abc"), self.space.w_unicode)
-        assert self.space.eq_w(self.space.newtext("üöä"), self.space.newunicode(u"üöä"))
+        assert self.space.eq_w(self.space.newtext("üöä"), self.space.newtext(u"üöä"))
 
     def test_str_w_non_str(self):
         raises(OperationError,self.space.str_w,self.space.wrap(None))
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -102,11 +102,11 @@
     def descr_repr(self, space):
         items = self.tolist()
         if len(items) == 1:
-            return space.newunicode(
+            return space.newtext(
                 u"(" + space.unicode_w(space.repr(items[0])) + u",)")
         tmp = u", ".join([space.unicode_w(space.repr(item))
                           for item in items])
-        return space.newunicode(u"(" + tmp + u")")
+        return space.newtext(u"(" + tmp + u")")
 
     def descr_hash(self, space):
         raise NotImplementedError
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -725,7 +725,7 @@
         else:
             mod = space.unicode_w(w_mod)
         if mod is not None and mod != u'builtins':
-            return space.newunicode(u"" % (mod, self.getqualname(space)))
+            return space.newtext(u"" % (mod, self.getqualname(space)))
         else:
             return space.newtext("" % (self.name,))
 
@@ -846,7 +846,7 @@
 
 def descr_get__name__(space, w_type):
     w_type = _check(space, w_type)
-    return space.newunicode(w_type.getname(space))
+    return space.newtext(w_type.getname(space))
 
 def descr_set__name__(space, w_type, w_value):
     w_type = _check(space, w_type)
@@ -863,7 +863,7 @@
 
 def descr_get__qualname__(space, w_type):
     w_type = _check(space, w_type)
-    return space.newunicode(w_type.getqualname(space))
+    return space.newtext(w_type.getqualname(space))
 
 def descr_set__qualname__(space, w_type, w_value):
     w_type = _check(space, w_type)
@@ -1453,7 +1453,7 @@
     cycle.reverse()
     names = [cls.getname(space) for cls in cycle]
     # Can't use oefmt() here, since names is a list of unicodes
-    raise OperationError(space.w_TypeError, space.newunicode(
+    raise OperationError(space.w_TypeError, space.newtext(
         u"cycle among base classes: " + u' < '.join(names)))
 
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -34,7 +34,7 @@
 
     @enforceargs(utf8str=str)
     def __init__(self, utf8str, length):
-        assert isinstance(utf8str, str)
+        assert isinstance(utf8str, bytes)
         assert length >= 0
         self._utf8 = utf8str
         self._length = length
@@ -283,7 +283,7 @@
         if space.is_w(space.type(self), space.w_unicode):
             return self
         # Subtype -- return genuine unicode string with the same value.
-        return space.newunicode(space.unicode_w(self))
+        return space.newtext(space.unicode_w(self))
 
     def descr_hash(self, space):
         x = compute_hash(self._utf8)
@@ -350,7 +350,7 @@
                 arg = __args__.keywords[i].decode('utf-8')
             except UnicodeDecodeError:
                 continue   # uh, just skip that
-            space.setitem(w_kwds, space.newunicode(arg),
+            space.setitem(w_kwds, space.newtext(arg),
                           __args__.keywords_w[i])
 
     def descr_format(self, space, __args__):

From pypy.commits at gmail.com  Thu Jun 14 01:43:21 2018
From: pypy.commits at gmail.com (mattip)
Date: Wed, 13 Jun 2018 22:43:21 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8-py3: reset invalid uses of
 str_decode_utf8 to deault values, need more review
Message-ID: <5b220079.1c69fb81.24ff5.7483@mx.google.com>

Author: Matti Picus 
Branch: unicode-utf8-py3
Changeset: r94760:e069b9df4f5f
Date: 2018-06-13 20:36 -0700
http://bitbucket.org/pypy/pypy/changeset/e069b9df4f5f/

Log:	reset invalid uses of str_decode_utf8 to deault values, need more
	review

diff --git a/pypy/TODO b/pypy/TODO
--- a/pypy/TODO
+++ b/pypy/TODO
@@ -1,6 +1,3 @@
-...
-
-
 antocuni's older TODO:
 
 * run coverage against the parser/astbuilder/astcompiler: it's probably full of
@@ -11,3 +8,5 @@
 
 * re-enable BUILD_LIST_FROM_ARG: see the comment in astcompiler/codegen.py in
 ast.ListComp.build_container
+
+* review use of std_decode_utf8, we probably do not want to be using it
diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -16,7 +16,7 @@
 from pypy.module._codecs.interp_codecs import CodecState
 
 def decode_utf8(u):
-    return str_decode_utf8(u, True, "strict", None)
+    return str_decode_utf8(u, "strict", True, None)
 
 def test_encode_utf8_allow_surrogates():
     sp = FakeSpace()
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -991,7 +991,8 @@
     # Surrogate-preserving utf-8 decoding.  Assuming there is no
     # encoding error, it should always be reversible, and the reverse is
     # encode_utf8sp().
-    return decode_utf8(space, string, allow_surrogates=True)
+    return str_decode_utf8(string, "string", True, decode_error_handler(space),
+                           allow_surrogates=True)
 
 
 # ____________________________________________________________
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -433,7 +433,7 @@
     def materialize_str_dict(self, space, obj, str_dict):
         new_obj = self.back.materialize_str_dict(space, obj, str_dict)
         if self.index == DICT:
-            uni_name = str_decode_utf8(space, self.name)
+            uni_name = str_decode_utf8(self.name, "string", True, None)[0]
             str_dict[uni_name] = obj._mapdict_read_storage(self.storageindex)
         else:
             self._copy_attr(obj, new_obj)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -334,7 +334,8 @@
 
     def newlist_text(self, list_t):
         return self.newlist_unicode([
-            str_decode_utf8(self, s, allow_surrogates=True) for s in list_t])
+            str_decode_utf8(s, "string", True, None, allow_surrogates=True)[0]
+                     for s in list_t])
 
     def newlist_utf8(self, list_u, is_ascii):
         if is_ascii:

From pypy.commits at gmail.com  Thu Jun 14 01:43:27 2018
From: pypy.commits at gmail.com (mattip)
Date: Wed, 13 Jun 2018 22:43:27 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8-py3: convert uncode_w to utf8_w,
 use decode when a python2 unicode object is required
Message-ID: <5b22007f.1c69fb81.d8905.ae8e@mx.google.com>

Author: Matti Picus 
Branch: unicode-utf8-py3
Changeset: r94762:92280566ae0d
Date: 2018-06-13 21:49 -0700
http://bitbucket.org/pypy/pypy/changeset/92280566ae0d/

Log:	convert uncode_w to utf8_w, use decode when a python2 unicode object
	is required

diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -112,7 +112,7 @@
     # only intern identifier-like strings
     from pypy.objspace.std.unicodeobject import _isidentifier
     if (space.is_w(space.type(w_const), space.w_unicode) and
-        _isidentifier(space.unicode_w(w_const))):
+        _isidentifier(space.utf8_w(w_const))):
         return space.new_interned_w_str(w_const)
     return w_const
 
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -326,7 +326,7 @@
                     # produce compatible pycs.
                     if (self.space.isinstance_w(w_obj, self.space.w_unicode) and
                         self.space.isinstance_w(w_const, self.space.w_unicode)):
-                        #unistr = self.space.unicode_w(w_const)
+                        #unistr = self.space.utf8_w(w_const)
                         #if len(unistr) == 1:
                         #    ch = ord(unistr[0])
                         #else:
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -80,7 +80,7 @@
 
     def getname(self, space):
         try:
-            return space.unicode_w(space.getattr(self, space.newtext('__name__')))
+            return space.utf8_w(space.getattr(self, space.newtext('__name__')))
         except OperationError as e:
             if e.match(space, space.w_TypeError) or e.match(space, space.w_AttributeError):
                 return u'?'
@@ -245,10 +245,6 @@
     def bytes_w(self, space):
         self._typed_unwrap_error(space, "bytes")
 
-    def unicode_w(self, space):
-        self._typed_unwrap_error(space, "string")
-    realunicode_w = unicode_w
-
     def utf8_w(self, space):
         self._typed_unwrap_error(space, "unicode")
 
@@ -824,7 +820,7 @@
 
     def new_interned_w_str(self, w_u):
         assert isinstance(w_u, W_Root)   # and is not None
-        u = self.unicode_w(w_u)
+        u = self.utf8_w(w_u)
         if not we_are_translated():
             assert type(u) is str
         w_u1 = self.interned_strings.get(u)
@@ -1719,8 +1715,8 @@
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
-    def unicode0_w(self, w_obj):
-        "Like unicode_w, but rejects strings with NUL bytes."
+    def utf8_0_w(self, w_obj):
+        "Like utf8_w, but rejects strings with NUL bytes."
         from rpython.rlib import rstring
         result = w_obj.utf8_w(self).decode('utf8')
         if u'\x00' in result:
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -507,9 +507,9 @@
                     if fmt == 'd':
                         result = str(value).decode('ascii')
                     elif fmt == 'R':
-                        result = space.unicode_w(space.repr(value))
+                        result = space.utf8_w(space.repr(value))
                     elif fmt == 'S':
-                        result = space.unicode_w(space.str(value))
+                        result = space.utf8_w(space.str(value))
                     elif fmt == 'T':
                         result = _decode_utf8(space.type(value).name)
                     elif fmt == 'N':
@@ -565,8 +565,8 @@
 
     %8 - The result of arg.decode('utf-8')
     %N - The result of w_arg.getname(space)
-    %R - The result of space.unicode_w(space.repr(w_arg))
-    %S - The result of space.unicode_w(space.str(w_arg))
+    %R - The result of space.utf8_w(space.repr(w_arg))
+    %S - The result of space.utf8_w(space.str(w_arg))
     %T - The result of space.type(w_arg).name
 
     """
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -337,7 +337,7 @@
 
         self.space = space
         self.name = space.text_w(w_name)
-        self.qualname = space.unicode_w(w_qualname)
+        self.qualname = space.utf8_w(w_qualname)
         self.code = space.interp_w(Code, w_code)
         if not space.is_w(w_closure, space.w_None):
             from pypy.interpreter.nestedscope import Cell
@@ -434,7 +434,7 @@
 
     def fset_func_qualname(self, space, w_name):
         try:
-            self.qualname = space.unicode_w(w_name)
+            self.qualname = space.utf8_w(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
@@ -549,13 +549,13 @@
             name = self.w_function.getname(self.space)
         else:
             try:
-                name = space.unicode_w(w_name)
+                name = space.utf8_w(w_name)
             except OperationError as e:
                 if not e.match(space, space.w_TypeError):
                     raise
                 name = u'?'
-        objrepr = space.unicode_w(space.repr(self.w_instance))
-        s = u'' % (name, objrepr)
+        objrepr = space.utf8_w(space.repr(self.w_instance))
+        s = b'' % (name, objrepr)
         return space.newtext(s)
 
     def descr_method_getattribute(self, w_attr):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -327,7 +327,7 @@
         self.run_args.append("space.text0_w(%s)" % (self.scopenext(),))
 
     def visit_unicode(self, typ):
-        self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),))
+        self.run_args.append("space.utf_8(%s)" % (self.scopenext(),))
 
     def visit_utf8(self, typ):
         self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),))
@@ -498,7 +498,7 @@
         self.unwrap.append("space.text_w(%s)" % (self.nextarg(),))
 
     def visit_unicode(self, typ):
-        self.unwrap.append("space.unicode_w(%s)" % (self.nextarg(),))
+        self.unwrap.append("space.utf_8(%s)" % (self.nextarg(),))
 
     def visit_text0(self, typ):
         self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),))
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -310,7 +310,7 @@
 
     def descr_set__qualname__(self, space, w_name):
         try:
-            self._qualname = space.unicode_w(w_name)
+            self._qualname = space.utf_8(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -453,7 +453,7 @@
         space = self.space
         # co_name should be an identifier
         name = self.co_name.decode('utf-8')
-        fn = space.unicode_w(self.w_filename)
-        return space.newtext(u'' % (
+        fn = space.utf_8(self.w_filename)
+        return space.newtext(b'' % (
             name, unicode(self.getaddrstring(space)), fn,
             -1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,8 +1081,8 @@
             try:
                 w_pkgname = space.getattr(
                     w_module, space.newtext('__name__'))
-                w_fullname = space.newtext(u'%s.%s' %
-                    (space.unicode_w(w_pkgname), space.unicode_w(w_name)))
+                w_fullname = space.newtext(b'%s.%s' %
+                    (space.utf8_w(w_pkgname), space.utf8_w(w_name)))
                 return space.getitem(space.sys.get('modules'), w_fullname)
             except OperationError:
                 raise oefmt(
@@ -1331,7 +1331,7 @@
     def _make_function(self, oparg, freevars=None):
         space = self.space
         w_qualname = self.popvalue()
-        qualname = self.space.unicode_w(w_qualname)
+        qualname = self.space.utf8_w(w_qualname)
         w_codeobj = self.popvalue()
         codeobj = self.space.interp_w(PyCode, w_codeobj)
         if freevars is not None:
@@ -1647,7 +1647,7 @@
         lst = []
         for i in range(itemcount-1, -1, -1):
             w_item = self.peekvalue(i)
-            lst.append(space.unicode_w(w_item))
+            lst.append(space.utf8_w(w_item))
         self.dropvalues(itemcount)
         w_res = space.newtext(u''.join(lst))
         self.pushvalue(w_res)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -97,10 +97,7 @@
     newtext = wrap
 
     def text_w(self, s):
-        return self.unicode_w(s).encode('utf-8')
-
-    def unicode_w(self, s):
-        return unicode(s)
+        return self.utf8_w(s)
 
     def len(self, x):
         return len(x)
@@ -342,14 +339,14 @@
     def test_unwrap_error(self):
         space = DummySpace()
         valuedummy = object()
-        def unicode_w(w):
+        def utf8_w(w):
             if w is None:
                 raise OperationError(TypeError, None)
             if w is valuedummy:
                 raise OperationError(ValueError, None)
-            return str(w)
-        space.unicode_w = unicode_w
-        space.text_w = unicode_w
+            return bytes(w, 'utf-8')
+        space.utf8_w = utf8_w
+        space.text_w = utf8_w
         excinfo = py.test.raises(OperationError, Arguments, space, [],
                                  ["a"], [1], w_starstararg={None: 1})
         assert excinfo.value.w_type is TypeError
diff --git a/pypy/module/_cffi_backend/errorbox.py b/pypy/module/_cffi_backend/errorbox.py
--- a/pypy/module/_cffi_backend/errorbox.py
+++ b/pypy/module/_cffi_backend/errorbox.py
@@ -86,7 +86,7 @@
                 return
 
             w_text = self.space.call_function(w_done)
-            p = rffi.unicode2wcharp(self.space.unicode_w(w_text),
+            p = rffi.unicode2wcharp(self.space.utf8_w(w_text),
                                     track_allocation=False)
             if self.text_p:
                 rffi.free_wcharp(self.text_p, track_allocation=False)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -101,7 +101,7 @@
                                      startpos, endpos):
             w_replace, newpos = errorhandler(errors, encoding, reason, input,
                                              startpos, endpos)
-            return space.unicode_w(w_replace), newpos
+            return space.utf8_w(w_replace), newpos
         return decode_call_errorhandler
 
     def make_encode_errorhandler(self, space):
@@ -111,7 +111,7 @@
             w_replace, newpos = errorhandler(errors, encoding, reason, input,
                                              startpos, endpos)
             if space.isinstance_w(w_replace, space.w_unicode):
-                return space.unicode_w(w_replace), None, newpos
+                return space.utf8_w(w_replace), None, newpos
             return None, space.bytes_w(w_replace), newpos
         return encode_call_errorhandler
 
@@ -1052,7 +1052,7 @@
     if errors is None:
         errors = 'strict'
     if space.isinstance_w(w_uni, space.w_unicode):
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         state = space.fromcache(CodecState)
         result = runicode.unicode_encode_unicode_internal(
             uni, len(uni), errors, state.encode_error_handler)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -43,7 +43,7 @@
     if w_src is None:
         return default
     try:
-        return space.unicode_w(w_src)
+        return space.utf8_w(w_src)
     except OperationError as e:
         if e.match(space, space.w_TypeError):
             raise oefmt(space.w_TypeError, '"%s" must be a string', attrname)
@@ -56,7 +56,7 @@
         return u'\0'
     if not space.isinstance_w(w_src, space.w_unicode):
         raise oefmt(space.w_TypeError, '"%s" must be string, not %T', name, w_src)
-    src = space.unicode_w(w_src)
+    src = space.utf8_w(w_src)
     if len(src) == 1:
         return src[0]
     if len(src) == 0:
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -73,13 +73,13 @@
                             break
                 raise
             self.line_num += 1
-            line = space.unicode_w(w_line)
+            line = space.utf8_w(w_line)
             for c in line:
-                if c == u'\0':
+                if c == b'\0':
                     raise self.error(u"line contains NULL byte")
 
                 if state == START_RECORD:
-                    if c == u'\n' or c == u'\r':
+                    if c == b'\n' or c == b'\r':
                         state = EAT_CRNL
                         continue
                     # normal character - handle as START_FIELD
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -42,9 +42,9 @@
             if space.is_w(w_field, space.w_None):
                 field = u""
             elif space.isinstance_w(w_field, space.w_float):
-                field = space.unicode_w(space.repr(w_field))
+                field = space.utf8_w(space.repr(w_field))
             else:
-                field = space.unicode_w(space.str(w_field))
+                field = space.utf8_w(space.str(w_field))
             #
             if dialect.quoting == QUOTE_NONNUMERIC:
                 try:
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -218,17 +218,17 @@
 
 def create_spec_for_function(space, w_func):
     assert isinstance(w_func, Function)
-    pre = u'built-in function ' if isinstance(w_func, BuiltinFunction) else u''
+    pre = b'built-in function ' if isinstance(w_func, BuiltinFunction) else b''
     if w_func.w_module is not None:
-        module = space.unicode_w(w_func.w_module)
-        if module != u'builtins':
-            return u'<%s%s.%s>' % (pre, module, w_func.getname(space))
-    return u'<%s%s>' % (pre, w_func.getname(space))
+        module = space.utf8_w(w_func.w_module)
+        if module != b'builtins':
+            return b'<%s%s.%s>' % (pre, module, w_func.getname(space))
+    return b'<%s%s>' % (pre, w_func.getname(space))
 
 
 def create_spec_for_object(space, w_type):
     class_name = w_type.getname(space)
-    return u"<'%s' object>" % (class_name,)
+    return b"<'%s' object>" % (class_name,)
 
 
 class W_DelayedBuiltinStr(W_Root):
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -17,7 +17,7 @@
 
 
 def raw_encode_basestring_ascii(space, w_unicode):
-    u = space.unicode_w(w_unicode)
+    u = space.utf8_w(w_unicode).encode()
     for i in range(len(u)):
         c = ord(u[i])
         if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py
--- a/pypy/module/_pypyjson/targetjson.py
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -75,10 +75,6 @@
         assert isinstance(w_x, W_String)
         return w_x.strval
 
-    def unicode_w(self, w_x):
-        assert isinstance(w_x, W_Unicode)
-        return w_x.unival
-
     @dont_inline
     def call_method(self, obj, name, arg):
         assert name == 'append'
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -233,8 +233,8 @@
     def _dealloc_warn(self):
         space = self.space
         try:
-            msg = (u"unclosed %s" %
-                   space.unicode_w(space.repr(self)))
+            msg = (b"unclosed %s" %
+                   space.utf8_w(space.repr(self)))
             space.warn(space.newtext(msg), space.w_ResourceWarning)
         except OperationError as e:
             # Spurious errors can appear at shutdown
diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -130,7 +130,7 @@
     assert space.bytes_w(w_p) == packed
     w_ip = space.appexec([w_socket, w_p],
                          "(_socket, p): return _socket.inet_ntoa(p)")
-    assert space.unicode_w(w_ip) == ip
+    assert space.utf8_w(w_ip) == ip
 
 def test_pton_ntop_ipv4():
     if not hasattr(socket, 'inet_pton'):
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -111,7 +111,7 @@
 
     def repr_w(self):
         space = self.space
-        u = space.unicode_w(space.repr(self.w_pattern))
+        u = space.utf8_w(space.repr(self.w_pattern)).decode()
         if len(u) > 200:
             u = u[:200]
         flag_items = []
@@ -163,8 +163,8 @@
         string = None
         buf = None
         space = self.space
-        if space.isinstance_w(w_string, space.w_unicode):
-            unicodestr = space.unicode_w(w_string)
+        if space.isinstance_w(w_string, space.w_utf8):
+            unicodestr = space.utf8_w(w_string).decode()
             length = len(unicodestr)
         elif space.isinstance_w(w_string, space.w_bytes):
             string = space.bytes_w(w_string)
@@ -565,7 +565,7 @@
         ctx = self.ctx
         start, end = ctx.match_start, ctx.match_end
         w_s = slice_w(space, ctx, start, end, space.w_None)
-        u = space.unicode_w(space.repr(w_s))
+        u = space.utf8_w(space.repr(w_s)).decode()
         if len(u) > 50:
             u = u[:50]
         return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
diff --git a/pypy/module/_string/formatter.py b/pypy/module/_string/formatter.py
--- a/pypy/module/_string/formatter.py
+++ b/pypy/module/_string/formatter.py
@@ -1,10 +1,10 @@
 def formatter_parser(space, w_unicode):
     from pypy.objspace.std.newformat import unicode_template_formatter
-    tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+    tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
     return tformat.formatter_parser()
 
 def formatter_field_name_split(space, w_unicode):
     from pypy.objspace.std.newformat import unicode_template_formatter
-    tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+    tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
     return tformat.formatter_field_name_split()
 
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -247,9 +247,9 @@
     w_stderr = space.sys.get("stderr")
 
     # Print "filename:lineno: category: text\n"
-    message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
-                                    space.unicode_w(w_name),
-                                    space.unicode_w(w_text))
+    message = b"%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno,
+                                    space.utf8_w(w_name),
+                                    space.utf8_w(w_text))
     space.call_method(w_stderr, "write", space.newtext(message))
 
     # Print "  source_line\n"
@@ -267,7 +267,7 @@
 
     if not w_sourceline:
         return
-    line = space.unicode_w(w_sourceline)
+    line = space.utf8_w(w_sourceline)
     if not line:
         return
 
diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -222,7 +222,7 @@
     if typ != rwinreg.REG_SZ:
         raise oefmt(space.w_ValueError, "Type must be winreg.REG_SZ")
     hkey = hkey_w(w_hkey, space)
-    with rffi.scoped_unicode2wcharp(space.unicode_w(w_subkey)) as subkey:
+    with rffi.scoped_unicode2wcharp(space.utf8_w(w_subkey).decode()) as subkey:
         c_subkey = rffi.cast(rffi.CCHARP, subkey)
         with rffi.scoped_unicode2wcharp(value) as dataptr:
             c_dataptr = rffi.cast(rffi.CCHARP, dataptr)
@@ -246,7 +246,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.unicode_w(w_subkey)
+        subkey = space.utf8_w(w_subkey).decode()
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
         with lltype.scoped_alloc(rwin32.PLONG.TO, 1) as bufsize_p:
@@ -296,7 +296,7 @@
             buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
             buf[0] = '\0'
         else:
-            buf = rffi.unicode2wcharp(space.unicode_w(w_value))
+            buf = rffi.unicode2wcharp(space.utf8_w(w_value).decode())
             buf = rffi.cast(rffi.CCHARP, buf)
             buflen = (space.len_w(w_value) * 2) + 1
 
@@ -314,7 +314,7 @@
             while True:
                 try:
                     w_item = space.next(w_iter)
-                    item = space.unicode_w(w_item)
+                    item = space.utf8_w(w_item).decode()
                     strings.append(item)
                     buflen += 2 * (len(item) + 1)
                 except OperationError as e:
@@ -455,7 +455,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.unicode_w(w_subkey)
+        subkey = space.utf8_w(w_subkey).decode()
     null_dword = lltype.nullptr(rwin32.LPDWORD.TO)
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -754,7 +754,7 @@
             return space.newtext("array('%s')" % self.typecode)
         elif self.typecode == "u":
             r = space.repr(self.descr_tounicode(space))
-            s = u"array('u', %s)" % space.unicode_w(r)
+            s = b"array('b', %s)" % space.utf8_w(r)
             return space.newtext(s)
         else:
             r = space.repr(self.descr_tolist(space))
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1707,13 +1707,13 @@
         else:
             look_for = also_look_for
     assert look_for is not None
-    msg = u"function %s not found in library %s" % (
-        look_for.decode('utf-8'), space.unicode_w(space.newfilename(path)))
+    msg = b"function %s not found in library %s" % (
+        look_for.decode('utf-8'), space.utf8_w(space.newfilename(path)))
     w_path = space.newfilename(path)
     raise_import_error(space, space.newtext(msg), w_name, w_path)
 
 def get_init_name(space, w_name):
-    name_u = space.unicode_w(w_name)
+    name_u = space.utf8_w(w_name).decode()
     basename_u = name_u.split(u'.')[-1]
     try:
         basename = basename_u.encode('ascii')
diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py
--- a/pypy/module/cpyext/state.py
+++ b/pypy/module/cpyext/state.py
@@ -141,7 +141,7 @@
             argv = space.sys.get('argv')
             if space.len_w(argv):
                 argv0 = space.getitem(argv, space.newint(0))
-                progname = space.unicode_w(argv0)
+                progname = space.utf8_w(argv0).decode()
             else:
                 progname = u"pypy3"
             self.programname = rffi.unicode2wcharp(progname)
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -101,7 +101,7 @@
 
 def is_interned_string(space, w_obj):
     try:
-        u = space.unicode_w(w_obj)
+        u = space.utf8_w(w_obj)
     except OperationError:
         return False
     return space.interned_strings.get(u) is not None
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -658,8 +658,8 @@
         b_text = rffi.str2charp('caf\x82xx')
         b_encoding = rffi.str2charp('cp437')
         b_errors = rffi.str2charp('strict')
-        assert space.unicode_w(PyUnicode_Decode(
-            space, b_text, 4, b_encoding, b_errors)) == u'caf\xe9'
+        assert space.utf8_w(PyUnicode_Decode(
+            space, b_text, 4, b_encoding, b_errors)).decode() == u'caf\xe9'
         assert (space.utf8_w(
             PyUnicode_Decode(space, b_text, 4, b_encoding, None)) ==
             u'caf\xe9'.encode("utf-8"))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -72,7 +72,7 @@
 
 def unicode_attach(space, py_obj, w_obj, w_userdata=None):
     "Fills a newly allocated PyUnicodeObject with a unicode string"
-    value = space.unicode_w(w_obj)
+    value = space.utf8_w(w_obj).decode()
     set_wsize(py_obj, len(value))
     set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
     _readify(space, py_obj, value)
@@ -353,7 +353,7 @@
     if not get_wbuffer(ref):
         # Copy unicode buffer
         w_unicode = from_ref(space, rffi.cast(PyObject, ref))
-        u = space.unicode_w(w_unicode)
+        u = space.utf8_w(w_unicode).decode()
         set_wbuffer(ref, rffi.unicode2wcharp(u))
         set_wsize(ref, len(u))
     if psize:
@@ -943,7 +943,7 @@
     than, equal, and greater than, respectively. It is best to pass only
     ASCII-encoded strings, but the function interprets the input string as
     ISO-8859-1 if it contains non-ASCII characters."""
-    uni = space.unicode_w(w_uni)
+    uni = space.utf8_w(w_uni).decode()
     i = 0
     # Compare Unicode string and source character set string
     while i < len(uni) and string[i] != '\0':
@@ -1054,7 +1054,7 @@
 
 @cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
 def PyUnicode_Substring(space, w_str, start, end):
-    usrc = space.unicode_w(w_str)
+    usrc = space.utf8_w(w_str).decode()
     length = len(usrc)
     if start < 0 or end < 0:
         raise oefmt(space.w_IndexError, "string index out of range")
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -150,10 +150,10 @@
 
     def descr_repr(self, space):
         if self.args_w:
-            args_repr = space.unicode_w(
+            args_repr = space.utf8_w(
                 space.repr(space.newtuple(self.args_w)))
         else:
-            args_repr = u"()"
+            args_repr = b"()"
         clsname = self.getclass(space).getname(space)
         return space.newtext(clsname + args_repr)
 
@@ -587,38 +587,38 @@
 
     def descr_str(self, space):
         if self.w_errno:
-            errno = space.unicode_w(space.str(self.w_errno))
+            errno = space.utf8_w(space.str(self.w_errno))
         else:
-            errno = u""
+            errno = b""
         if self.w_strerror:
-            strerror = space.unicode_w(space.str(self.w_strerror))
+            strerror = space.utf8_w(space.str(self.w_strerror))
         else:
-            strerror = u""
+            strerror = b""
         if rwin32.WIN32 and self.w_winerror:
-            winerror = space.unicode_w(space.str(self.w_winerror))
+            winerror = space.utf8_w(space.str(self.w_winerror))
             # If available, winerror has the priority over errno
             if self.w_filename:
                 if self.w_filename2:
-                    return space.newtext(u"[WinError %s] %s: %s -> %s" % (
+                    return space.newtext(b"[WinError %s] %s: %s -> %s" % (
                         winerror, strerror,
-                        space.unicode_w(space.repr(self.w_filename)),
-                        space.unicode_w(space.repr(self.w_filename2))))
-                return space.newtext(u"[WinError %s] %s: %s" % (
+                        space.utf8_w(space.repr(self.w_filename)),
+                        space.utf8_w(space.repr(self.w_filename2))))
+                return space.newtext(b"[WinError %s] %s: %s" % (
                     winerror, strerror,
-                    space.unicode_w(space.repr(self.w_filename))))
-            return space.newtext(u"[WinError %s] %s" % (
+                    space.utf8_w(space.repr(self.w_filename))))
+            return space.newtext(b"[WinError %s] %s" % (
                 winerror, strerror))
         if self.w_filename:
             if self.w_filename2:
-                return space.newtext(u"[Errno %s] %s: %s -> %s" % (
+                return space.newtext(b"[Errno %s] %s: %s -> %s" % (
                     errno, strerror,
-                    space.unicode_w(space.repr(self.w_filename)),
-                    space.unicode_w(space.repr(self.w_filename2))))
-            return space.newtext(u"[Errno %s] %s: %s" % (
+                    space.utf8_w(space.repr(self.w_filename)),
+                    space.utf8_w(space.repr(self.w_filename2))))
+            return space.newtext(b"[Errno %s] %s: %s" % (
                 errno, strerror,
-                space.unicode_w(space.repr(self.w_filename))))
+                space.utf8_w(space.repr(self.w_filename))))
         if self.w_errno and self.w_strerror:
-            return space.newtext(u"[Errno %s] %s" % (
+            return space.newtext(b"[Errno %s] %s" % (
                 errno, strerror))
         return W_BaseException.descr_str(self, space)
 
@@ -785,7 +785,7 @@
             values_w = space.fixedview(self.args_w[1])
             w_tuple = space.newtuple(values_w + [self.w_lastlineno])
             args_w = [self.args_w[0], w_tuple]
-            args_repr = space.unicode_w(space.repr(space.newtuple(args_w)))
+            args_repr = space.utf8_w(space.repr(space.newtuple(args_w)))
             clsname = self.getclass(space).getname(space)
             return space.newtext(clsname + args_repr)
         else:
@@ -793,15 +793,15 @@
 
     # CPython Issue #21669: Custom error for 'print' & 'exec' as statements
     def _report_missing_parentheses(self, space):
-        text = space.unicode_w(self.w_text)
-        if u'(' in text:
+        text = space.utf8_w(self.w_text)
+        if b'(' in text:
             # Use default error message for any line with an opening paren
             return
         # handle the simple statement case
         if self._check_for_legacy_statements(space, text, 0):
             return
         # Handle the one-line complex statement case
-        pos = text.find(u':')
+        pos = text.find(b':')
         if pos < 0:
             return
         # Check again, starting from just after the colon
@@ -817,11 +817,11 @@
         if start > 0:
             text = text[start:]
         # Check for legacy print statements
-        if text.startswith(u"print "):
+        if text.startswith(b"print "):
             self.w_msg = space.newtext("Missing parentheses in call to 'print'")
             return True
         # Check for legacy exec statements
-        if text.startswith(u"exec "):
+        if text.startswith(b"exec "):
             self.w_msg = space.newtext("Missing parentheses in call to 'exec'")
             return True
         return False
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2258,7 +2258,7 @@
                                space.newint(info[2])])
 
     def _getfinalpathname(space, w_path):
-        path = space.unicode_w(w_path)
+        path = space.utf8_w(w_path)
         try:
             result = nt._getfinalpathname(path)
         except nt.LLNotImplemented as e:
diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -27,7 +27,7 @@
         if space.isinstance_w(w_path, space.w_bytes):
             raise oefmt(space.w_TypeError, "os.scandir() doesn't support bytes path"
                                            " on Windows, use Unicode instead")
-        path = space.unicode_w(w_path)
+        path = space.utf8_w(w_path)
         result_is_bytes = False
 
     # 'path' is always bytes on posix and always unicode on windows
@@ -157,8 +157,8 @@
         self.w_name = w_name
 
     def descr_repr(self, space):
-        u = space.unicode_w(space.repr(self.w_name))
-        return space.newtext(u"" % u)
+        u = space.utf8_w(space.repr(self.w_name))
+        return space.newtext(b"" % u)
 
     def fget_name(self, space):
         return self.w_name
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -639,7 +639,7 @@
         """Parse(data[, isfinal])
 Parse XML data.  `isfinal' should be true at end of input."""
         if space.isinstance_w(w_data, space.w_unicode):
-            data = encode_utf8(space, w_data.unicode_w(space))
+            data = encode_utf8(space, w_data.utf8_w(space))
             # Explicitly set UTF-8 encoding. Return code ignored.
             XML_SetEncoding(self.itself, "utf-8")
         else:
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -616,7 +616,7 @@
             # it saves the string that is later deleted when this
             # function is called again. A refactoring of this module
             # could remove this
-            tm_zone = encode_utf8(space, space.unicode_w(tup_w[9]), allow_surrogates=True)
+            tm_zone = space.utf8_w(tup_w[9])
             malloced_str = rffi.str2charp(tm_zone, track_allocation=False)
             if old_tm_zone != lltype.nullptr(rffi.CCHARP.TO):
                 rffi.free_charp(old_tm_zone, track_allocation=False)
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -203,7 +203,7 @@
     def descr_fromhex(space, w_bytearraytype, w_hexstring):
         if not space.is_w(space.type(w_hexstring), space.w_unicode):
             raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
-        hexstring = space.unicode_w(w_hexstring)
+        hexstring = space.utf8_w(w_hexstring)
         data = _hexstring_to_array(space, hexstring)
         # in CPython bytearray.fromhex is a staticmethod, so
         # we ignore w_type and always return a bytearray
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -566,7 +566,7 @@
         if not space.is_w(space.type(w_hexstring), space.w_unicode):
             raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
         from pypy.objspace.std.bytearrayobject import _hexstring_to_array
-        hexstring = space.unicode_w(w_hexstring)
+        hexstring = space.utf8_w(w_hexstring)
         bytes = ''.join(_hexstring_to_array(space, hexstring))
         return W_BytesObject(bytes)
 
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1436,8 +1436,8 @@
     def descr_repr(self, space):
         typename = space.type(self).getname(space)
         w_seq = space.call_function(space.w_list, self)
-        seq_repr = space.unicode_w(space.repr(w_seq))
-        return space.newtext(u"%s(%s)" % (typename, seq_repr))
+        seq_repr = space.utf8_w(space.repr(w_seq))
+        return space.newtext(b"%s(%s)" % (typename, seq_repr))
 
     def descr_len(self, space):
         return space.len(self.w_dict)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -44,8 +44,8 @@
         return space.str(self.w_mapping)
 
     def descr_repr(self, space):
-        return space.newtext(u"mappingproxy(%s)" %
-                                (space.unicode_w(space.repr(self.w_mapping)),))
+        return space.newtext(b"mappingproxy(%s)" %
+                                (space.utf8_w(space.repr(self.w_mapping)),))
 
     @unwrap_spec(w_default=WrappedDefault(None))
     def get_w(self, space, w_key, w_default):
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -447,14 +447,14 @@
                 # arbitrary unicode chars if w_value is an arbitrary unicode
                 # string
                 w_value = self.space.repr(w_value)
-                self.std_wp(self.space.unicode_w(w_value))
+                self.std_wp(self.space.utf8_w(w_value))
 
         def fmt_a(self, w_value):
             from pypy.objspace.std.unicodeobject import ascii_from_object
             w_value = ascii_from_object(self.space, w_value)
             # %a calls ascii(), which should return an ascii unicode string
             if do_unicode:
-                value = self.space.unicode_w(w_value)
+                value = self.space.utf8_w(w_value)
             else:
                 value = self.space.text_w(w_value)
             self.std_wp(value)
@@ -498,7 +498,7 @@
                 raise oefmt(space.w_TypeError, "%c requires int or single byte")
             else:
                 if space.isinstance_w(w_value, space.w_unicode):
-                    ustr = space.unicode_w(w_value)
+                    ustr = space.utf8_w(w_value)
                     if len(ustr) == 1:
                         self.std_wp(ustr)
                         return
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -387,8 +387,7 @@
 
 def _unmarshal_strlist(u):
     items_w = _unmarshal_tuple_w(u)
-    return [_encode_utf8(u.space, u.space.unicode_w(w_item))
-            for w_item in items_w]
+    return [u.space.utf8_w(w_item) for w_item in items_w]
 
 def _unmarshal_tuple_w(u):
     w_obj = u.get_w_obj()
@@ -414,8 +413,8 @@
     varnames    = _unmarshal_strlist(u)
     freevars    = _unmarshal_strlist(u)
     cellvars    = _unmarshal_strlist(u)
-    filename    = _encode_utf8(space, space.unicode0_w(u.get_w_obj()))
-    name        = _encode_utf8(space, space.unicode_w(u.get_w_obj()))
+    filename    = space.utf8_0_w(u.get_w_obj())
+    name        = space.utf8_w(u.get_w_obj())
     firstlineno = u.get_int()
     lnotab      = space.bytes_w(u.get_w_obj())
     filename = assert_str0(filename)
@@ -442,12 +441,11 @@
         m.atom_str(typecode, s)
 
 # surrogate-preserving variants
-_encode_utf8 = unicodehelper.encode_utf8sp
 _decode_utf8 = unicodehelper.decode_utf8sp
 
 @marshaller(W_UnicodeObject)
 def marshal_unicode(space, w_unicode, m):
-    s = _encode_utf8(space, space.unicode_w(w_unicode))
+    s = space.utf8_w(w_unicode)
     _marshal_unicode(space, s, m, w_unicode=w_unicode)
 
 @unmarshaller(TYPE_UNICODE)
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -572,7 +572,7 @@
             space = self.space
             if not space.is_w(space.type(w_string), space.w_unicode):
                 w_string = space.str(w_string)
-            string = space.unicode_w(w_string)
+            string = space.utf8_w(w_string)
             if self._parse_spec("s", "<"):
                 return self.wrap(string)
             if self._type != "s":
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -762,12 +762,12 @@
             w_module = w_type.lookup("__module__")
             if w_module is not None:
                 try:
-                    modulename = self.unicode_w(w_module)
+                    modulename = self.utf8_w(w_module)
                 except OperationError as e:
                     if not e.match(self, self.w_TypeError):
                         raise
                 else:
-                    classname = u'%s.%s' % (modulename, classname)
+                    classname = b'%s.%s' % (modulename, classname)
         else:
             classname = w_type.name.decode('utf-8')
         return classname
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1291,9 +1291,9 @@
         assert isinstance(string, str)
         return string
 
-    def unicode_w(self, u):
-        assert isinstance(u, unicode)
-        return u
+    def utf8_w(self, b):
+        assert isinstance(u, str)
+        return b
 
     def int_w(self, integer, allow_conversion=True):
         assert isinstance(integer, int)
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -103,10 +103,10 @@
         items = self.tolist()
         if len(items) == 1:
             return space.newtext(
-                u"(" + space.unicode_w(space.repr(items[0])) + u",)")
-        tmp = u", ".join([space.unicode_w(space.repr(item))
+                b"(" + space.utf8_w(space.repr(items[0])) + b",)")
+        tmp = b", ".join([space.utf8_w(space.repr(item))
                           for item in items])
-        return space.newtext(u"(" + tmp + u")")
+        return space.newtext(b"(" + tmp + b")")
 
     def descr_hash(self, space):
         raise NotImplementedError
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -204,7 +204,7 @@
             w_qualname = self.dict_w.pop('__qualname__', None)
             if w_qualname is not None:
                 if space.isinstance_w(w_qualname, space.w_unicode):
-                    self.qualname = space.unicode_w(w_qualname)
+                    self.qualname = space.utf8_w(w_qualname)
                 elif not self.flag_cpytype:
                     raise oefmt(space.w_TypeError,
                                 "type __qualname__ must be a str, not %T",
@@ -723,9 +723,9 @@
         if w_mod is None or not space.isinstance_w(w_mod, space.w_text):
             mod = None
         else:
-            mod = space.unicode_w(w_mod)
-        if mod is not None and mod != u'builtins':
-            return space.newtext(u"" % (mod, self.getqualname(space)))
+            mod = space.utf8_w(w_mod)
+        if mod is not None and mod != b'builtins':
+            return space.newtext(b"" % (mod, self.getqualname(space)))
         else:
             return space.newtext("" % (self.name,))
 
@@ -869,7 +869,7 @@
     w_type = _check(space, w_type)
     if not w_type.is_heaptype():
         raise oefmt(space.w_TypeError, "can't set %N.__qualname__", w_type)
-    w_type.qualname = space.unicode_w(w_value)
+    w_type.qualname = space.utf8_w(w_value)
 
 def descr_get__mro__(space, w_type):
     w_type = _check(space, w_type)
@@ -1158,7 +1158,7 @@
     if not space.isinstance_w(w_name, space.w_text):
         raise oefmt(space.w_TypeError,
             "__slots__ items must be strings, not '%T'", w_name)
-    if not _isidentifier(space.unicode_w(w_name)):
+    if not _isidentifier(space.utf8_w(w_name)):
         raise oefmt(space.w_TypeError, "__slots__ must be identifiers")
     return w_name.text_w(space)
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -210,15 +210,15 @@
 
     @staticmethod
     def descr_maketrans(space, w_type, w_x, w_y=None, w_z=None):
-        y = None if space.is_none(w_y) else space.unicode_w(w_y)
-        z = None if space.is_none(w_z) else space.unicode_w(w_z)
+        y = None if space.is_none(w_y) else space.utf8_w(w_y)
+        z = None if space.is_none(w_z) else space.utf8_w(w_z)
         w_new = space.newdict()
 
         if y is not None:
             # x must be a string too, of equal length
             ylen = len(y)
             try:
-                x = space.unicode_w(w_x)
+                x = space.utf8_w(w_x)
             except OperationError as e:
                 if not e.match(space, space.w_TypeError):
                     raise
@@ -257,7 +257,7 @@
                 w_key, w_value = space.unpackiterable(w_item, 2)
                 if space.isinstance_w(w_key, space.w_unicode):
                     # convert string keys to integer keys
-                    key = space.unicode_w(w_key)
+                    key = space.utf8_w(w_key)
                     if len(key) != 1:
                         raise oefmt(space.w_ValueError,
                                     "string keys in translate table must be "
@@ -283,7 +283,7 @@
         if space.is_w(space.type(self), space.w_unicode):
             return self
         # Subtype -- return genuine unicode string with the same value.
-        return space.newtext(space.unicode_w(self))
+        return space.newtext(space.utf8_w(self))
 
     def descr_hash(self, space):
         x = compute_hash(self._utf8)

From pypy.commits at gmail.com  Thu Jun 14 01:43:30 2018
From: pypy.commits at gmail.com (mattip)
Date: Wed, 13 Jun 2018 22:43:30 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8-py3: try and fail to make progress
 with test_unicodehelper
Message-ID: <5b220082.1c69fb81.d31b7.d39e@mx.google.com>

Author: Matti Picus 
Branch: unicode-utf8-py3
Changeset: r94763:52d2576ff698
Date: 2018-06-13 22:42 -0700
http://bitbucket.org/pypy/pypy/changeset/52d2576ff698/

Log:	try and fail to make progress with test_unicodehelper

diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -6,14 +6,19 @@
 from pypy.interpreter.unicodehelper import (
     encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be)
 from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
-
+from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
+from pypy.interpreter import unicodehelper as uh
+from pypy.module._codecs.interp_codecs import CodecState
 
 class Hit(Exception):
     pass
 
-from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
-from pypy.interpreter import unicodehelper as uh
-from pypy.module._codecs.interp_codecs import CodecState
+class FakeSpace:
+    def __getattr__(self, name):
+        if name in ('w_UnicodeEncodeError', 'w_UnicodeDecodeError'):
+            raise Hit
+        raise AttributeError(name)
+
 
 def decode_utf8(u):
     return str_decode_utf8(u, "strict", True, None)
@@ -82,18 +87,23 @@
 
 @pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
 def test_utf32_surrogates(unich):
-    assert (unicode_encode_utf_32_be(unich, 1, None) ==
+    assert (utf8_encode_utf_32_be(unich.encode('utf-8'), None) ==
             struct.pack('>i', ord(unich)))
     with pytest.raises(UnicodeEncodeError):
-        unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+        def errorhandler(errors, enc, msg, b, startingpos, endingpos):
+             u = b.decode('utf-8')
+             raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+        utf8_encode_utf_32_be(unich.encode('utf-8'), None, errorhandler,
+                              allow_surrogates=False)
 
     def replace_with(ru, rs):
         def errorhandler(errors, enc, msg, u, startingpos, endingpos):
             if errors == 'strict':
                 raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
-            return ru, rs, endingpos
-        return unicode_encode_utf_32_be(
-            u"<%s>" % unich, 3, None,
+            return ru.encode('utf-8'), endingpos
+        uch = u"<%s>" % unich
+        return utf8_encode_utf_32_be(
+            uch.encode('utf8'), None,
             errorhandler, allow_surrogates=False)
     assert replace_with(u'rep', None) == u''.encode('utf-32-be')
     assert (replace_with(None, '\xca\xfe\xca\xfe') ==
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -93,12 +93,12 @@
     from pypy.module._codecs import interp_codecs
     state = space.fromcache(interp_codecs.CodecState)
     if _WIN32:
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         bytes = unicode_encode_mbcs(uni, len(uni), 'strict',
                                     errorhandler=encode_error_handler(space),
                                     force_replace=False)
     elif _MACOSX:
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         bytes = runicode.unicode_encode_utf_8_impl(
             uni, len(uni), 'surrogateescape',
             errorhandler=state.encode_error_handler,
@@ -110,8 +110,8 @@
         # instead
         from pypy.module._codecs.locale import (
             unicode_encode_locale_surrogateescape)
-        uni = space.unicode_w(w_uni)
-        if u'\x00' in uni:
+        uni = space.utf8_w(w_uni)
+        if b'\x00' in uni:
             raise oefmt(space.w_ValueError, "embedded null character")
         bytes = unicode_encode_locale_surrogateescape(
             uni, errorhandler=encode_error_handler(space))

From pypy.commits at gmail.com  Thu Jun 14 06:20:38 2018
From: pypy.commits at gmail.com (arigo)
Date: Thu, 14 Jun 2018 03:20:38 -0700 (PDT)
Subject: [pypy-commit] pypy default: Rename the MethodType to
 "instancemethod", like CPython
Message-ID: <5b224176.1c69fb81.5fae7.9bbd@mx.google.com>

Author: Armin Rigo 
Branch: 
Changeset: r94764:8973688d3d1c
Date: 2018-06-14 12:19 +0200
http://bitbucket.org/pypy/pypy/changeset/8973688d3d1c/

Log:	Rename the MethodType to "instancemethod", like CPython

diff --git a/pypy/interpreter/test/test_function.py b/pypy/interpreter/test/test_function.py
--- a/pypy/interpreter/test/test_function.py
+++ b/pypy/interpreter/test/test_function.py
@@ -455,6 +455,8 @@
         assert repr(B().f).startswith(">")
 
+        assert repr(type(A.f)) == repr(type(A().f)) == ""
+
 
     def test_method_call(self):
         class C(object):
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -688,7 +688,7 @@
 Function.typedef.acceptable_as_base_class = False
 
 Method.typedef = TypeDef(
-    "method",
+    "instancemethod",
     __doc__ = """instancemethod(function, instance, class)
 
 Create an instance method object.""",

From pypy.commits at gmail.com  Thu Jun 14 17:59:54 2018
From: pypy.commits at gmail.com (arigo)
Date: Thu, 14 Jun 2018 14:59:54 -0700 (PDT)
Subject: [pypy-commit] pypy.org extradoc: update the values
Message-ID: <5b22e55a.1c69fb81.af4a0.fa0b@mx.google.com>

Author: Armin Rigo 
Branch: extradoc
Changeset: r928:dcf5b394ec81
Date: 2018-06-14 23:59 +0200
http://bitbucket.org/pypy/pypy.org/changeset/dcf5b394ec81/

Log:	update the values

diff --git a/don1.html b/don1.html
--- a/don1.html
+++ b/don1.html
@@ -9,13 +9,13 @@
 
 
    
-   $67126 of $105000 (63.9%)
+   $68455 of $105000 (65.2%)
    
    
 
@@ -23,7 +23,7 @@
   
   This donation goes towards supporting Python 3 in PyPy.
   Current status:
-    we have $2900 left
+    we have $4093 left
   in the account.
 Read proposal
   
   
diff --git a/don4.html b/don4.html
--- a/don4.html
+++ b/don4.html
@@ -9,7 +9,7 @@
 
@@ -17,7 +17,7 @@
    2nd call:
    
-   $59080 of $80000 (73.9%)
+   $59201 of $80000 (74.0%)
    
    
 
@@ -29,7 +29,7 @@