From pypy.commits at gmail.com Sat Sep 1 07:53:54 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 04:53:54 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: (lucaz97, ronan) Handle underscores in Decimal constructor Message-ID: <5b8a7dd2.1c69fb81.85672.fd63@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95054:4f3876339fff Date: 2018-09-01 13:52 +0200 http://bitbucket.org/pypy/pypy/changeset/4f3876339fff/ Log: (lucaz97, ronan) Handle underscores in Decimal constructor diff --git a/lib_pypy/_decimal.py b/lib_pypy/_decimal.py --- a/lib_pypy/_decimal.py +++ b/lib_pypy/_decimal.py @@ -234,6 +234,7 @@ @classmethod def _from_str(cls, value, context, exact=True, strip=True): + value = value.replace("_", "") s = str.encode(value, 'ascii', '_decimal_encode') if b'\0' in s: s = b'' # empty string triggers ConversionSyntax. From pypy.commits at gmail.com Sat Sep 1 08:59:25 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 01 Sep 2018 05:59:25 -0700 (PDT) Subject: [pypy-commit] pypy default: Issue #2878 Message-ID: <5b8a8d2d.1c69fb81.30fb3.c6dd@mx.google.com> Author: Armin Rigo Branch: Changeset: r95055:28c9dafa5a46 Date: 2018-09-01 14:58 +0200 http://bitbucket.org/pypy/pypy/changeset/28c9dafa5a46/ Log: Issue #2878 Handle a case of keepalive that I imagined would never occur diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -393,9 +393,11 @@ _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -39,6 +39,15 @@ assert b.value in (1684234849, # little endian 1633837924) # big endian + def test_from_buffer_keepalive(self): + # Issue #2878 + b1 = bytearray("ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} + try: c_wchar except NameError: From pypy.commits at gmail.com Sat Sep 1 09:40:46 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 06:40:46 -0700 (PDT) Subject: [pypy-commit] pypy default: (lucaz97, ronan) Leading underscores are never allowed (even on py3.6) + clean up test Message-ID: <5b8a96de.1c69fb81.16ea3.53b5@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95056:f7ebab619eb2 Date: 2018-09-01 15:37 +0200 http://bitbucket.org/pypy/pypy/changeset/f7ebab619eb2/ Log: (lucaz97, ronan) Leading underscores are never allowed (even on py3.6) + clean up test diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -464,6 +464,10 @@ raise InvalidBaseError("%s() base must be >= 2 and <= 36" % fname) self.base = base + # Leading underscores are not allowed + if s.startswith('_'): + self.error() + if base == 16 and (s.startswith('0x') or s.startswith('0X')): s = s[2:] if base == 8 and (s.startswith('0o') or s.startswith('0O')): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -554,50 +554,52 @@ py.test.raises(ParseStringError, string_to_int, '+'+s, base) py.test.raises(ParseStringError, string_to_int, '-'+s, base) - def test_number_underscores(self): - VALID_UNDERSCORE_LITERALS = [ - '0_0_0', - '4_2', - '1_0000_0000', - '0b1001_0100', - '0xfff_ffff', - '0o5_7_7', - '0b_0', - '0x_f', - '0o_5', - ] - INVALID_UNDERSCORE_LITERALS = [ - # Trailing underscores: - '0_', - '42_', - '1.4j_', - '0x_', - '0b1_', - '0xf_', - '0o5_', - # Underscores in the base selector: - '0_b0', - '0_xf', - '0_o5', - # Old-style octal, still disallowed: - '09_99', - # Multiple consecutive underscores: - '4_______2', - '0b1001__0100', - '0xfff__ffff', - '0x___', - '0o5__77', - '1e1__0', - ] - for x in VALID_UNDERSCORE_LITERALS: - print x - y = string_to_int(x, base=0, allow_underscores=True, - no_implicit_octal=True) - assert y == int(x.replace('_', ''), base=0) - for x in INVALID_UNDERSCORE_LITERALS: - print x - py.test.raises(ParseStringError, string_to_int, x, base=0, - allow_underscores=True) + @py.test.mark.parametrize('s', [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xfff_ffff', + '0o5_7_7', + '0b_0', + '0x_f', + '0o_5', + ]) + def test_valid_underscores(self, s): + result = string_to_int( + s, base=0, allow_underscores=True, no_implicit_octal=True) + assert result == int(s.replace('_', ''), base=0) + + @py.test.mark.parametrize('s', [ + # Leading underscores + '_100', + '_', + '_0b1001_0100', + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0b1001__0100', + '0xfff__ffff', + '0x___', + '0o5__77', + '1e1__0', + ]) + def test_invalid_underscores(self, s): + with py.test.raises(ParseStringError): + string_to_int(s, base=0, allow_underscores=True) def test_no_implicit_octal(self): TESTS = ['00', '000', '00_00', '02', '0377', '02_34'] From pypy.commits at gmail.com Sat Sep 1 09:40:48 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 06:40:48 -0700 (PDT) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5b8a96e0.1c69fb81.a9cce.f4aa@mx.google.com> Author: Ronan Lamy Branch: Changeset: r95057:b9cf6798af7e Date: 2018-09-01 15:39 +0200 http://bitbucket.org/pypy/pypy/changeset/b9cf6798af7e/ Log: merge heads diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -393,9 +393,11 @@ _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -39,6 +39,15 @@ assert b.value in (1684234849, # little endian 1633837924) # big endian + def test_from_buffer_keepalive(self): + # Issue #2878 + b1 = bytearray("ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} + try: c_wchar except NameError: From pypy.commits at gmail.com Sat Sep 1 09:58:41 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 06:58:41 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b8a9b11.1c69fb81.df6f2.eac6@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r95058:33d16cf29160 Date: 2018-09-01 15:56 +0200 http://bitbucket.org/pypy/pypy/changeset/33d16cf29160/ Log: hg merge default diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -189,6 +189,7 @@ self._buffer = self._ffiarray(self._length_, autofree=True) for i, arg in enumerate(args): self[i] = arg + _init_no_arg_ = __init__ def _fix_index(self, index): if index < 0: diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -110,7 +110,7 @@ raise ValueError( "Buffer size too small (%d instead of at least %d bytes)" % (buf.nbytes, offset + size)) - result = self() + result = self._newowninstance_() dest = result._buffer.buffer try: raw_addr = buf._pypy_raw_address() + offset @@ -121,6 +121,11 @@ memmove(dest, raw_addr, size) return result + def _newowninstance_(self): + result = self.__new__(self) + result._init_no_arg_() + return result + class CArgObject(object): """ simple wrapper around buffer, just for the case of freeing @@ -151,6 +156,7 @@ def __init__(self, *args, **kwds): raise TypeError("%s has no type" % (type(self),)) + _init_no_arg_ = __init__ def _ensure_objects(self): if '_objects' not in self.__dict__: diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -267,6 +267,7 @@ return raise TypeError("Unknown constructor %s" % (args,)) + _init_no_arg_ = __init__ def _wrap_callable(self, to_call, argtypes): def f(*args): @@ -557,7 +558,7 @@ keepalive, newarg, newargtype = self._conv_param(argtype, defval) else: import ctypes - val = argtype._type_() + val = argtype._type_._newowninstance_() keepalive = None newarg = ctypes.byref(val) newargtype = type(newarg) diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -67,8 +67,11 @@ self._buffer = ffiarray(1, autofree=True) if value is not None: self.contents = value + def _init_no_arg_(self): + self._buffer = ffiarray(1, autofree=True) self._ffiarray = ffiarray self.__init__ = __init__ + self._init_no_arg_ = _init_no_arg_ self._type_ = TP def _build_ffiargtype(self): @@ -136,27 +139,21 @@ if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()): raise TypeError("cast() argument 2 must be a pointer type, not %s" % (tp,)) + result = tp._newowninstance_() if isinstance(obj, int): - result = tp() result._buffer[0] = obj return result elif obj is None: - result = tp() return result elif isinstance(obj, Array): - ptr = tp.__new__(tp) - ptr._buffer = tp._ffiarray(1, autofree=True) - ptr._buffer[0] = obj._buffer - result = ptr + result._buffer[0] = obj._buffer elif isinstance(obj, bytes): - result = tp() result._buffer[0] = memoryview(obj)._pypy_raw_address() return result elif not (isinstance(obj, _CData) and type(obj)._is_pointer_like()): raise TypeError("cast() argument 1 must be a pointer, not %s" % (type(obj),)) else: - result = tp() result._buffer[0] = obj._buffer[0] # The casted objects '_objects' member: diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -378,11 +378,14 @@ self._buffer = self._ffiarray(1, autofree=True) if value is not DEFAULT_VALUE: self.value = value + _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -280,6 +280,7 @@ self.__setattr__(name, arg) for name, arg in kwds.items(): self.__setattr__(name, arg) + _init_no_arg_ = __init__ def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -34,6 +34,15 @@ assert b.value in (1684234849, # little endian 1633837924) # big endian + def test_from_buffer_keepalive(self): + # Issue #2878 + b1 = bytearray("ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} + try: c_wchar except NameError: diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -498,6 +498,22 @@ assert dostruct(Native) == dostruct(Big) assert dostruct(Native) != dostruct(Little) + def test_from_buffer_copy(self): + from array import array + + class S(Structure): + _fields_ = [('i', c_int)] + def __init__(self, some, unused, arguments): + pass + a = array('i', [1234567]) + s1 = S.from_buffer(a) + s2 = S.from_buffer_copy(a) + assert s1.i == 1234567 + assert s2.i == 1234567 + a[0] = -7654321 + assert s1.i == -7654321 + assert s2.i == 1234567 + class TestPointerMember(BaseCTypesTestChecker): def test_1(self): diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -464,6 +464,10 @@ raise InvalidBaseError("%s() base must be >= 2 and <= 36" % fname) self.base = base + # Leading underscores are not allowed + if s.startswith('_'): + self.error() + if base == 16 and (s.startswith('0x') or s.startswith('0X')): s = s[2:] if base == 8 and (s.startswith('0o') or s.startswith('0O')): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -554,50 +554,52 @@ py.test.raises(ParseStringError, string_to_int, '+'+s, base) py.test.raises(ParseStringError, string_to_int, '-'+s, base) - def test_number_underscores(self): - VALID_UNDERSCORE_LITERALS = [ - '0_0_0', - '4_2', - '1_0000_0000', - '0b1001_0100', - '0xfff_ffff', - '0o5_7_7', - '0b_0', - '0x_f', - '0o_5', - ] - INVALID_UNDERSCORE_LITERALS = [ - # Trailing underscores: - '0_', - '42_', - '1.4j_', - '0x_', - '0b1_', - '0xf_', - '0o5_', - # Underscores in the base selector: - '0_b0', - '0_xf', - '0_o5', - # Old-style octal, still disallowed: - '09_99', - # Multiple consecutive underscores: - '4_______2', - '0b1001__0100', - '0xfff__ffff', - '0x___', - '0o5__77', - '1e1__0', - ] - for x in VALID_UNDERSCORE_LITERALS: - print x - y = string_to_int(x, base=0, allow_underscores=True, - no_implicit_octal=True) - assert y == int(x.replace('_', ''), base=0) - for x in INVALID_UNDERSCORE_LITERALS: - print x - py.test.raises(ParseStringError, string_to_int, x, base=0, - allow_underscores=True) + @py.test.mark.parametrize('s', [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xfff_ffff', + '0o5_7_7', + '0b_0', + '0x_f', + '0o_5', + ]) + def test_valid_underscores(self, s): + result = string_to_int( + s, base=0, allow_underscores=True, no_implicit_octal=True) + assert result == int(s.replace('_', ''), base=0) + + @py.test.mark.parametrize('s', [ + # Leading underscores + '_100', + '_', + '_0b1001_0100', + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0b1001__0100', + '0xfff__ffff', + '0x___', + '0o5__77', + '1e1__0', + ]) + def test_invalid_underscores(self, s): + with py.test.raises(ParseStringError): + string_to_int(s, base=0, allow_underscores=True) def test_no_implicit_octal(self): TESTS = ['00', '000', '00_00', '02', '0377', '02_34'] From pypy.commits at gmail.com Sat Sep 1 09:58:43 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 06:58:43 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 Message-ID: <5b8a9b13.1c69fb81.48b54.1c8e@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95059:3101fec75c3f Date: 2018-09-01 15:57 +0200 http://bitbucket.org/pypy/pypy/changeset/3101fec75c3f/ Log: hg merge py3.5 diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -189,6 +189,7 @@ self._buffer = self._ffiarray(self._length_, autofree=True) for i, arg in enumerate(args): self[i] = arg + _init_no_arg_ = __init__ def _fix_index(self, index): if index < 0: diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -110,7 +110,7 @@ raise ValueError( "Buffer size too small (%d instead of at least %d bytes)" % (buf.nbytes, offset + size)) - result = self() + result = self._newowninstance_() dest = result._buffer.buffer try: raw_addr = buf._pypy_raw_address() + offset @@ -121,6 +121,11 @@ memmove(dest, raw_addr, size) return result + def _newowninstance_(self): + result = self.__new__(self) + result._init_no_arg_() + return result + class CArgObject(object): """ simple wrapper around buffer, just for the case of freeing @@ -151,6 +156,7 @@ def __init__(self, *args, **kwds): raise TypeError("%s has no type" % (type(self),)) + _init_no_arg_ = __init__ def _ensure_objects(self): if '_objects' not in self.__dict__: diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -267,6 +267,7 @@ return raise TypeError("Unknown constructor %s" % (args,)) + _init_no_arg_ = __init__ def _wrap_callable(self, to_call, argtypes): def f(*args): @@ -557,7 +558,7 @@ keepalive, newarg, newargtype = self._conv_param(argtype, defval) else: import ctypes - val = argtype._type_() + val = argtype._type_._newowninstance_() keepalive = None newarg = ctypes.byref(val) newargtype = type(newarg) diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -67,8 +67,11 @@ self._buffer = ffiarray(1, autofree=True) if value is not None: self.contents = value + def _init_no_arg_(self): + self._buffer = ffiarray(1, autofree=True) self._ffiarray = ffiarray self.__init__ = __init__ + self._init_no_arg_ = _init_no_arg_ self._type_ = TP def _build_ffiargtype(self): @@ -136,27 +139,21 @@ if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()): raise TypeError("cast() argument 2 must be a pointer type, not %s" % (tp,)) + result = tp._newowninstance_() if isinstance(obj, int): - result = tp() result._buffer[0] = obj return result elif obj is None: - result = tp() return result elif isinstance(obj, Array): - ptr = tp.__new__(tp) - ptr._buffer = tp._ffiarray(1, autofree=True) - ptr._buffer[0] = obj._buffer - result = ptr + result._buffer[0] = obj._buffer elif isinstance(obj, bytes): - result = tp() result._buffer[0] = memoryview(obj)._pypy_raw_address() return result elif not (isinstance(obj, _CData) and type(obj)._is_pointer_like()): raise TypeError("cast() argument 1 must be a pointer, not %s" % (type(obj),)) else: - result = tp() result._buffer[0] = obj._buffer[0] # The casted objects '_objects' member: diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -378,11 +378,14 @@ self._buffer = self._ffiarray(1, autofree=True) if value is not DEFAULT_VALUE: self.value = value + _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -280,6 +280,7 @@ self.__setattr__(name, arg) for name, arg in kwds.items(): self.__setattr__(name, arg) + _init_no_arg_ = __init__ def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -34,6 +34,15 @@ assert b.value in (1684234849, # little endian 1633837924) # big endian + def test_from_buffer_keepalive(self): + # Issue #2878 + b1 = bytearray("ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} + try: c_wchar except NameError: diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -498,6 +498,22 @@ assert dostruct(Native) == dostruct(Big) assert dostruct(Native) != dostruct(Little) + def test_from_buffer_copy(self): + from array import array + + class S(Structure): + _fields_ = [('i', c_int)] + def __init__(self, some, unused, arguments): + pass + a = array('i', [1234567]) + s1 = S.from_buffer(a) + s2 = S.from_buffer_copy(a) + assert s1.i == 1234567 + assert s2.i == 1234567 + a[0] = -7654321 + assert s1.i == -7654321 + assert s2.i == 1234567 + class TestPointerMember(BaseCTypesTestChecker): def test_1(self): diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -464,6 +464,10 @@ raise InvalidBaseError("%s() base must be >= 2 and <= 36" % fname) self.base = base + # Leading underscores are not allowed + if s.startswith('_'): + self.error() + if base == 16 and (s.startswith('0x') or s.startswith('0X')): s = s[2:] if base == 8 and (s.startswith('0o') or s.startswith('0O')): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -554,50 +554,52 @@ py.test.raises(ParseStringError, string_to_int, '+'+s, base) py.test.raises(ParseStringError, string_to_int, '-'+s, base) - def test_number_underscores(self): - VALID_UNDERSCORE_LITERALS = [ - '0_0_0', - '4_2', - '1_0000_0000', - '0b1001_0100', - '0xfff_ffff', - '0o5_7_7', - '0b_0', - '0x_f', - '0o_5', - ] - INVALID_UNDERSCORE_LITERALS = [ - # Trailing underscores: - '0_', - '42_', - '1.4j_', - '0x_', - '0b1_', - '0xf_', - '0o5_', - # Underscores in the base selector: - '0_b0', - '0_xf', - '0_o5', - # Old-style octal, still disallowed: - '09_99', - # Multiple consecutive underscores: - '4_______2', - '0b1001__0100', - '0xfff__ffff', - '0x___', - '0o5__77', - '1e1__0', - ] - for x in VALID_UNDERSCORE_LITERALS: - print x - y = string_to_int(x, base=0, allow_underscores=True, - no_implicit_octal=True) - assert y == int(x.replace('_', ''), base=0) - for x in INVALID_UNDERSCORE_LITERALS: - print x - py.test.raises(ParseStringError, string_to_int, x, base=0, - allow_underscores=True) + @py.test.mark.parametrize('s', [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xfff_ffff', + '0o5_7_7', + '0b_0', + '0x_f', + '0o_5', + ]) + def test_valid_underscores(self, s): + result = string_to_int( + s, base=0, allow_underscores=True, no_implicit_octal=True) + assert result == int(s.replace('_', ''), base=0) + + @py.test.mark.parametrize('s', [ + # Leading underscores + '_100', + '_', + '_0b1001_0100', + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0b1001__0100', + '0xfff__ffff', + '0x___', + '0o5__77', + '1e1__0', + ]) + def test_invalid_underscores(self, s): + with py.test.raises(ParseStringError): + string_to_int(s, base=0, allow_underscores=True) def test_no_implicit_octal(self): TESTS = ['00', '000', '00_00', '02', '0377', '02_34'] From pypy.commits at gmail.com Sat Sep 1 10:34:52 2018 From: pypy.commits at gmail.com (rlamy) Date: Sat, 01 Sep 2018 07:34:52 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: Fix unicodedata.lookup() Message-ID: <5b8aa38c.1c69fb81.b0a82.d888@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8-py3 Changeset: r95060:cf8669f6fe69 Date: 2018-09-01 16:34 +0200 http://bitbucket.org/pypy/pypy/changeset/cf8669f6fe69/ Log: Fix unicodedata.lookup() diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -82,9 +82,9 @@ sequence = self._lookup_named_sequence(code) if sequence is not None: # named sequences only contain UCS2 codes, no surrogates &co. - return space.newutf8(unichr_as_utf8(r_uint(code)), 1) + return space.newutf8(sequence.encode('utf-8'), len(sequence)) - + return space.newutf8(unichr_as_utf8(r_uint(code)), 1) def name(self, space, w_unichr, w_default=None): code = unichr_to_code_w(space, w_unichr) From pypy.commits at gmail.com Sat Sep 1 10:59:27 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:27 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni), 'strict') Message-ID: <5b8aa94f.1c69fb81.d04f.4241@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95061:ef8722afb037 Date: 2018-08-31 14:29 +0200 http://bitbucket.org/pypy/pypy/changeset/ef8722afb037/ Log: uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni), 'strict') diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -21,7 +21,7 @@ """Translate an error code to a unicode message string.""" from pypy.module._codecs.locale import str_decode_locale_surrogateescape uni = str_decode_locale_surrogateescape(os.strerror(errno)) - return uni.encode('utf8'), len(uni) + return runicode.unicode_encode_utf_8(uni, len(uni), 'strict') class OperationError(Exception): """Interpreter-level exception that signals an exception that should be @@ -647,7 +647,8 @@ msg = u'Windows Error %d' % winerror w_errno = space.w_None w_winerror = space.newint(winerror) - w_msg = space.newtext(msg.encode('utf8'), len(msg)) + msg_utf8 = runicode.unicode_encode_utf_8(msg, len(msg), 'strict') + w_msg = space.newtext(msg_utf8, len(msg)) else: errno = e.errno if errno == EINTR: diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -317,7 +317,8 @@ errorhandler = decode_error_handler(space) res, size = str_decode_mbcs(s, slen, final=final, errors=errors, errorhandler=errorhandler) - return res.encode('utf8'), len(res) + res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict') + return res_utf8, len(res) def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False): """ Same as checking for the valid utf8, but we know the utf8 is not diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -444,7 +444,9 @@ ch = 0 if ch == 0: raise OperationError(space.type(w_exc), w_exc) - return space.newtuple([space.newtext(unichr(ch).encode('utf8'), 1), + ch_utf8 = runicode.unicode_encode_utf_8(unichr(ch), 1, 'strict', + allow_surrogates=True) + return space.newtuple([space.newtext(ch_utf8, 1), space.newint(start + bytelength)]) else: raise oefmt(space.w_TypeError, @@ -483,7 +485,9 @@ if not consumed: # codec complained about ASCII byte. raise OperationError(space.type(w_exc), w_exc) - return space.newtuple([space.newtext(replace.encode('utf8'), len(replace)), + replace_utf8 = runicode.unicode_encode_utf_8(replace, len(replace), + 'strict', allow_surrogates=True) + return space.newtuple([space.newtext(replace_utf8, len(replace)), space.newint(start + consumed)]) else: raise oefmt(space.w_TypeError, diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -42,7 +42,8 @@ return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): uni = ctx._unicodestr[start:end] - return space.newtext(uni.encode('utf8'), len(uni)) + uni_utf8 = runicode.unicode_encode_utf_8(uni, len(uni), 'strict') + return space.newtext(uni_utf8, len(uni)) else: # unreachable raise SystemError diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -84,7 +84,8 @@ s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj)) w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type)) w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type) - w_obj.__init__(s.encode('utf8'), len(s)) + s_utf8 = runicode.unicode_encode_utf_8(s, len(s), 'strict') + w_obj.__init__(s_utf8, len(s)) track_reference(space, py_obj, w_obj) return w_obj From pypy.commits at gmail.com Sat Sep 1 10:59:29 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:29 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix test_ztranslation by creatng FakeObjSpace W_UnicodeObject Message-ID: <5b8aa951.1c69fb81.6d2a7.8a3e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95062:b6ec3298a53a Date: 2018-08-31 14:31 +0200 http://bitbucket.org/pypy/pypy/changeset/b6ec3298a53a/ Log: fix test_ztranslation by creatng FakeObjSpace W_UnicodeObject diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1774,7 +1774,8 @@ def convert_arg_to_w_unicode(self, w_obj, strict=None): # XXX why convert_to_w_unicode does something slightly different? from pypy.objspace.std.unicodeobject import W_UnicodeObject - assert not hasattr(self, 'is_fake_objspace') + # for z_translation tests + if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar") return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict) def utf8_len_w(self, w_obj): @@ -1785,6 +1786,8 @@ # Like utf8_w(), but only works if w_obj is really of type # 'unicode'. On Python 3 this is the same as utf8_w(). from pypy.objspace.std.unicodeobject import W_UnicodeObject + # for z_translation tests + if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar") if not isinstance(w_obj, W_UnicodeObject): raise oefmt(self.w_TypeError, "argument must be a unicode") return self.utf8_w(w_obj) diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -60,9 +60,6 @@ return '', endingpos raise ValueError -def convert_arg_to_w_unicode(space, w_arg, strict=None): - return space.convert_arg_to_w_unicode(w_arg) - # ____________________________________________________________ def fsdecode(space, w_string): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -249,7 +249,7 @@ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # weeoes - w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) + w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -308,7 +308,7 @@ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # for errors - w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) + w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -646,7 +646,7 @@ @unwrap_spec(errors='text_or_none') def wrap_encoder(space, w_arg, errors="strict"): # w_arg is a W_Unicode or W_Bytes? - w_arg = unicodehelper.convert_arg_to_w_unicode(space, w_arg, rname) + w_arg = space.convert_arg_to_w_unicode(w_arg, errors) if errors is None: errors = 'strict' state = space.fromcache(CodecState) @@ -912,7 +912,7 @@ mapping = Charmap_Encode(space, w_mapping) state = space.fromcache(CodecState) - w_uni = unicodehelper.convert_arg_to_w_unicode(space, w_unicode) + w_uni = space.convert_arg_to_w_unicode(w_unicode) result = unicodehelper.utf8_encode_charmap( space.utf8_w(w_uni), errors, state.encode_error_handler, mapping) return space.newtuple([space.newbytes(result), space.newint(w_uni._len())]) diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -68,6 +68,15 @@ def append(self, w_other): pass +class W_UnicodeOjbect(W_MyObject): + _length = 21 + _utf8 = 'foobar' + def _index_to_byte(self, at): + return NonConstant(42) + def _len(self): + return self._length + + class W_MyType(W_MyObject): name = "foobar" flag_map_or_seq = '?' @@ -220,7 +229,7 @@ @specialize.argtype(1) def newtext(self, x, lgt=-1): - return w_some_obj() + return W_UnicodeOjbect() newtext_or_none = newtext newfilename = newtext From pypy.commits at gmail.com Sat Sep 1 10:59:31 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:31 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: sometimes allow surrogates in uni.encode('*escape*') Message-ID: <5b8aa953.1c69fb81.87c15.c5db@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95063:07f0e7708c40 Date: 2018-08-31 14:32 +0200 http://bitbucket.org/pypy/pypy/changeset/07f0e7708c40/ Log: sometimes allow surrogates in uni.encode('*escape*') diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -191,8 +191,8 @@ def descr_decode(self, space, w_encoding=None, w_errors=None): from pypy.objspace.std.unicodeobject import ( _get_encoding_and_errors, decode_object) - encoding, errors = _get_encoding_and_errors(space, w_encoding, - w_errors) + encoding, errors, allow_surrogates = _get_encoding_and_errors(space, + w_encoding, w_errors) if errors is None: errors = 'strict' if encoding is None: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -184,13 +184,13 @@ if w_object is None: w_value = W_UnicodeObject.EMPTY else: - encoding, errors = _get_encoding_and_errors(space, w_encoding, - w_errors) + encoding, errors, allow_surrogates = _get_encoding_and_errors(space, + w_encoding, w_errors) if encoding is None and errors is None: w_value = unicode_from_object(space, w_object) else: - w_value = unicode_from_encoded_object(space, w_object, - encoding, errors) + w_value = unicode_from_encoded_object(space, w_object, encoding, + errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value @@ -513,9 +513,10 @@ return space.w_True def descr_encode(self, space, w_encoding=None, w_errors=None): - encoding, errors = _get_encoding_and_errors(space, w_encoding, - w_errors) - return encode_object(space, self, encoding, errors, allow_surrogates=False) + encoding, errors, allow_surrogates = _get_encoding_and_errors(space, + w_encoding, w_errors) + return encode_object(space, self, encoding, errors, + allow_surrogates=allow_surrogates) @unwrap_spec(tabsize=int) def descr_expandtabs(self, space, tabsize=8): @@ -1184,7 +1185,10 @@ def _get_encoding_and_errors(space, w_encoding, w_errors): encoding = None if w_encoding is None else space.text_w(w_encoding) errors = None if w_errors is None else space.text_w(w_errors) - return encoding, errors + allow_surrogates = False + if encoding and 'escape' in encoding: + allow_surrogates = True + return encoding, errors, allow_surrogates def encode_object(space, w_object, encoding, errors, allow_surrogates=False): From pypy.commits at gmail.com Sat Sep 1 10:59:33 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:33 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: do c07dc57b964d differently (output '?' for bad chr) Message-ID: <5b8aa955.1c69fb81.d04f.424e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95064:58568f219c61 Date: 2018-08-31 17:35 +0200 http://bitbucket.org/pypy/pypy/changeset/58568f219c61/ Log: do c07dc57b964d differently (output '?' for bad chr) diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -289,9 +289,6 @@ def build(self, space, r, stop): builder = Utf8StringBuilder(stop * 3) for i in range(stop): - code = r_uint(r[i]) - if code > r_uint(0x10FFFF): - raise oefmt(space.w_ValueError, "code > 0x10FFFF") builder.append_code(r[i]) return space.newutf8(builder.build(), stop) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -50,6 +50,7 @@ chr((0x80 | ((code >> 12) & 0x3f))) + chr((0x80 | ((code >> 6) & 0x3f))) + chr((0x80 | (code & 0x3f)))) + return '?' raise ValueError @try_inline From pypy.commits at gmail.com Sat Sep 1 10:59:35 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:35 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix ef8722afb037 Message-ID: <5b8aa957.1c69fb81.4f350.6e5a@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95065:34e4b4d3147b Date: 2018-08-31 22:12 +0200 http://bitbucket.org/pypy/pypy/changeset/34e4b4d3147b/ Log: fix ef8722afb037 diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -21,7 +21,7 @@ """Translate an error code to a unicode message string.""" from pypy.module._codecs.locale import str_decode_locale_surrogateescape uni = str_decode_locale_surrogateescape(os.strerror(errno)) - return runicode.unicode_encode_utf_8(uni, len(uni), 'strict') + return runicode.unicode_encode_utf_8(uni, len(uni), 'strict'), len(uni) class OperationError(Exception): """Interpreter-level exception that signals an exception that should be diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -8,6 +8,7 @@ from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.runicode import unicode_encode_utf_8 # ____________________________________________________________ # @@ -42,7 +43,7 @@ return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): uni = ctx._unicodestr[start:end] - uni_utf8 = runicode.unicode_encode_utf_8(uni, len(uni), 'strict') + uni_utf8 = unicode_encode_utf_8(uni, len(uni), 'strict') return space.newtext(uni_utf8, len(uni)) else: # unreachable From pypy.commits at gmail.com Sat Sep 1 10:59:37 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:37 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: add failing test - id clash causes pickling errors Message-ID: <5b8aa959.1c69fb81.b57a7.cd1a@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95066:efa8c0bedfc6 Date: 2018-09-01 16:50 +0200 http://bitbucket.org/pypy/pypy/changeset/efa8c0bedfc6/ Log: add failing test - id clash causes pickling errors diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -1026,3 +1026,9 @@ pass assert type(Sub1(X())) is Sub1 assert Sub1(X()) == b'foo' + + def test_id(self): + a = b'abcabc' + id_b = id(str(a, 'latin1')) + id_a = id(a) + assert id_a != id_b From pypy.commits at gmail.com Sat Sep 1 10:59:39 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:39 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge heads Message-ID: <5b8aa95b.1c69fb81.86623.94fc@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95067:9282b2668b8f Date: 2018-09-01 16:50 +0200 http://bitbucket.org/pypy/pypy/changeset/9282b2668b8f/ Log: merge heads diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -82,9 +82,9 @@ sequence = self._lookup_named_sequence(code) if sequence is not None: # named sequences only contain UCS2 codes, no surrogates &co. - return space.newutf8(unichr_as_utf8(r_uint(code)), 1) + return space.newutf8(sequence.encode('utf-8'), len(sequence)) - + return space.newutf8(unichr_as_utf8(r_uint(code)), 1) def name(self, space, w_unichr, w_default=None): code = unichr_to_code_w(space, w_unichr) From pypy.commits at gmail.com Sat Sep 1 10:59:41 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 01 Sep 2018 07:59:41 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge 3.5 into branch Message-ID: <5b8aa95d.1c69fb81.e8b71.6e02@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95068:f8e98c69e510 Date: 2018-09-01 16:53 +0200 http://bitbucket.org/pypy/pypy/changeset/f8e98c69e510/ Log: merge 3.5 into branch diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -189,6 +189,7 @@ self._buffer = self._ffiarray(self._length_, autofree=True) for i, arg in enumerate(args): self[i] = arg + _init_no_arg_ = __init__ def _fix_index(self, index): if index < 0: diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -110,7 +110,7 @@ raise ValueError( "Buffer size too small (%d instead of at least %d bytes)" % (buf.nbytes, offset + size)) - result = self() + result = self._newowninstance_() dest = result._buffer.buffer try: raw_addr = buf._pypy_raw_address() + offset @@ -121,6 +121,11 @@ memmove(dest, raw_addr, size) return result + def _newowninstance_(self): + result = self.__new__(self) + result._init_no_arg_() + return result + class CArgObject(object): """ simple wrapper around buffer, just for the case of freeing @@ -151,6 +156,7 @@ def __init__(self, *args, **kwds): raise TypeError("%s has no type" % (type(self),)) + _init_no_arg_ = __init__ def _ensure_objects(self): if '_objects' not in self.__dict__: diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -267,6 +267,7 @@ return raise TypeError("Unknown constructor %s" % (args,)) + _init_no_arg_ = __init__ def _wrap_callable(self, to_call, argtypes): def f(*args): @@ -557,7 +558,7 @@ keepalive, newarg, newargtype = self._conv_param(argtype, defval) else: import ctypes - val = argtype._type_() + val = argtype._type_._newowninstance_() keepalive = None newarg = ctypes.byref(val) newargtype = type(newarg) diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -67,8 +67,11 @@ self._buffer = ffiarray(1, autofree=True) if value is not None: self.contents = value + def _init_no_arg_(self): + self._buffer = ffiarray(1, autofree=True) self._ffiarray = ffiarray self.__init__ = __init__ + self._init_no_arg_ = _init_no_arg_ self._type_ = TP def _build_ffiargtype(self): @@ -136,27 +139,21 @@ if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()): raise TypeError("cast() argument 2 must be a pointer type, not %s" % (tp,)) + result = tp._newowninstance_() if isinstance(obj, int): - result = tp() result._buffer[0] = obj return result elif obj is None: - result = tp() return result elif isinstance(obj, Array): - ptr = tp.__new__(tp) - ptr._buffer = tp._ffiarray(1, autofree=True) - ptr._buffer[0] = obj._buffer - result = ptr + result._buffer[0] = obj._buffer elif isinstance(obj, bytes): - result = tp() result._buffer[0] = memoryview(obj)._pypy_raw_address() return result elif not (isinstance(obj, _CData) and type(obj)._is_pointer_like()): raise TypeError("cast() argument 1 must be a pointer, not %s" % (type(obj),)) else: - result = tp() result._buffer[0] = obj._buffer[0] # The casted objects '_objects' member: diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -378,11 +378,14 @@ self._buffer = self._ffiarray(1, autofree=True) if value is not DEFAULT_VALUE: self.value = value + _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -280,6 +280,7 @@ self.__setattr__(name, arg) for name, arg in kwds.items(): self.__setattr__(name, arg) + _init_no_arg_ = __init__ def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -302,13 +302,6 @@ assert ambig == sys.modules.get('ambig') assert hasattr(ambig,'imapackage') - def test_trailing_dot(self): - # bug-for-bug compatibility with CPython - import sys - __import__('pkg.pkg1.') - assert 'pkg.pkg1' in sys.modules - assert 'pkg.pkg1.' not in sys.modules - def test_from_a(self): import sys from a import imamodule diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_buffers.py @@ -34,6 +34,15 @@ assert b.value in (1684234849, # little endian 1633837924) # big endian + def test_from_buffer_keepalive(self): + # Issue #2878 + b1 = bytearray("ab") + array = (c_uint16 * 32)() + array[6] = c_uint16.from_buffer(b1) + # this is also what we get on CPython. I don't think it makes + # sense because the array contains just a copy of the number. + assert array._objects == {'6': b1} + try: c_wchar except NameError: diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py @@ -498,6 +498,22 @@ assert dostruct(Native) == dostruct(Big) assert dostruct(Native) != dostruct(Little) + def test_from_buffer_copy(self): + from array import array + + class S(Structure): + _fields_ = [('i', c_int)] + def __init__(self, some, unused, arguments): + pass + a = array('i', [1234567]) + s1 = S.from_buffer(a) + s2 = S.from_buffer_copy(a) + assert s1.i == 1234567 + assert s2.i == 1234567 + a[0] = -7654321 + assert s1.i == -7654321 + assert s2.i == 1234567 + class TestPointerMember(BaseCTypesTestChecker): def test_1(self): diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -529,6 +529,10 @@ raise InvalidBaseError("%s() base must be >= 2 and <= 36" % fname) self.base = base + # Leading underscores are not allowed + if s.startswith('_'): + self.error() + if base == 16 and (s.startswith('0x') or s.startswith('0X')): s = s[2:] if base == 8 and (s.startswith('0o') or s.startswith('0O')): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -554,50 +554,52 @@ py.test.raises(ParseStringError, string_to_int, '+'+s, base) py.test.raises(ParseStringError, string_to_int, '-'+s, base) - def test_number_underscores(self): - VALID_UNDERSCORE_LITERALS = [ - '0_0_0', - '4_2', - '1_0000_0000', - '0b1001_0100', - '0xfff_ffff', - '0o5_7_7', - '0b_0', - '0x_f', - '0o_5', - ] - INVALID_UNDERSCORE_LITERALS = [ - # Trailing underscores: - '0_', - '42_', - '1.4j_', - '0x_', - '0b1_', - '0xf_', - '0o5_', - # Underscores in the base selector: - '0_b0', - '0_xf', - '0_o5', - # Old-style octal, still disallowed: - '09_99', - # Multiple consecutive underscores: - '4_______2', - '0b1001__0100', - '0xfff__ffff', - '0x___', - '0o5__77', - '1e1__0', - ] - for x in VALID_UNDERSCORE_LITERALS: - print x - y = string_to_int(x, base=0, allow_underscores=True, - no_implicit_octal=True) - assert y == int(x.replace('_', ''), base=0) - for x in INVALID_UNDERSCORE_LITERALS: - print x - py.test.raises(ParseStringError, string_to_int, x, base=0, - allow_underscores=True) + @py.test.mark.parametrize('s', [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xfff_ffff', + '0o5_7_7', + '0b_0', + '0x_f', + '0o_5', + ]) + def test_valid_underscores(self, s): + result = string_to_int( + s, base=0, allow_underscores=True, no_implicit_octal=True) + assert result == int(s.replace('_', ''), base=0) + + @py.test.mark.parametrize('s', [ + # Leading underscores + '_100', + '_', + '_0b1001_0100', + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0b1001__0100', + '0xfff__ffff', + '0x___', + '0o5__77', + '1e1__0', + ]) + def test_invalid_underscores(self, s): + with py.test.raises(ParseStringError): + string_to_int(s, base=0, allow_underscores=True) def test_no_implicit_octal(self): TESTS = ['00', '000', '00_00', '02', '0377', '02_34'] From pypy.commits at gmail.com Sun Sep 2 05:15:41 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 02 Sep 2018 02:15:41 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: Fix for id(unicode) Message-ID: <5b8baa3d.1c69fb81.4efc1.3e06@mx.google.com> Author: Armin Rigo Branch: unicode-utf8-py3 Changeset: r95069:f9566e8f8110 Date: 2018-09-02 11:14 +0200 http://bitbucket.org/pypy/pypy/changeset/f9566e8f8110/ Log: Fix for id(unicode) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -22,7 +22,7 @@ from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop, normalize_simple_slice) from pypy.objspace.std.stringmethods import StringMethods -from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT +from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT, IDTAG_ALT_UID __all__ = ['W_UnicodeObject', 'encode_object', 'decode_object', 'unicode_from_object', 'unicode_to_decimal_w'] @@ -68,7 +68,7 @@ return False s1 = space.utf8_w(self) s2 = space.utf8_w(w_other) - if len(s2) > 2: + if self._len() > 1: return s1 is s2 else: # strings of len <= 1 are unique-ified return s1 == s2 @@ -76,14 +76,16 @@ def immutable_unique_id(self, space): if self.user_overridden_class: return None - s = space.utf8_w(self) - if len(s) > 2: - uid = compute_unique_id(s) - else: # strings of len <= 1 are unique-ified - if len(s) == 1: - base = ~ord(s[0]) # negative base values - elif len(s) == 2: - base = ~((ord(s[1]) << 8) | ord(s[0])) + l = self._len() + if l > 1: + # return the uid plus 2, to make sure we don't get + # conflicts with W_BytesObject, whose id() might be + # identical + uid = compute_unique_id(self._utf8) + IDTAG_ALT_UID + else: # strings of len <= 1 are unique-ified + if l == 1: + base = rutf8.codepoint_at_pos(self._utf8, 0) + base = ~base # negative base values else: base = 257 # empty unicode string: base value 257 uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py --- a/pypy/objspace/std/util.py +++ b/pypy/objspace/std/util.py @@ -4,6 +4,7 @@ from pypy.interpreter import gateway IDTAG_SHIFT = 4 +IDTAG_ALT_UID = 2 # gives an alternate id() from the same real uid IDTAG_INT = 1 IDTAG_LONG = 3 From pypy.commits at gmail.com Sun Sep 2 05:16:01 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 02 Sep 2018 02:16:01 -0700 (PDT) Subject: [pypy-commit] pypy default: Issue #2876 Message-ID: <5b8baa51.1c69fb81.7a815.0ed5@mx.google.com> Author: Armin Rigo Branch: Changeset: r95070:88a9f1bbf1c8 Date: 2018-09-01 15:04 +0200 http://bitbucket.org/pypy/pypy/changeset/88a9f1bbf1c8/ Log: Issue #2876 Add select.PIPE_BUF. diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,6 +3,7 @@ import sys import os +from select import PIPE_BUF class Module(MixedModule): @@ -11,7 +12,8 @@ interpleveldefs = { 'select': 'interp_select.select', - 'error' : 'space.fromcache(interp_select.Cache).w_error' + 'error' : 'space.fromcache(interp_select.Cache).w_error', + 'PIPE_BUF' : 'space.wrap(%r)' % PIPE_BUF, } if os.name =='posix': diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py --- a/pypy/module/select/test/test_select.py +++ b/pypy/module/select/test/test_select.py @@ -245,6 +245,10 @@ raises(OverflowError, pollster.modify, 1, -1) raises(OverflowError, pollster.modify, 1, 1 << 64) + def test_PIPE_BUF(self): + import select + assert isinstance(select.PIPE_BUF, int) + class AppTestSelectWithPipes(_AppTestSelect): "Use a pipe to get pairs of file descriptors" From pypy.commits at gmail.com Sun Sep 2 05:16:03 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 02 Sep 2018 02:16:03 -0700 (PDT) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5b8baa53.1c69fb81.1367c.c97e@mx.google.com> Author: Armin Rigo Branch: Changeset: r95071:942ad6c1866e Date: 2018-09-02 11:15 +0200 http://bitbucket.org/pypy/pypy/changeset/942ad6c1866e/ Log: merge heads diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -464,6 +464,10 @@ raise InvalidBaseError("%s() base must be >= 2 and <= 36" % fname) self.base = base + # Leading underscores are not allowed + if s.startswith('_'): + self.error() + if base == 16 and (s.startswith('0x') or s.startswith('0X')): s = s[2:] if base == 8 and (s.startswith('0o') or s.startswith('0O')): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -554,50 +554,52 @@ py.test.raises(ParseStringError, string_to_int, '+'+s, base) py.test.raises(ParseStringError, string_to_int, '-'+s, base) - def test_number_underscores(self): - VALID_UNDERSCORE_LITERALS = [ - '0_0_0', - '4_2', - '1_0000_0000', - '0b1001_0100', - '0xfff_ffff', - '0o5_7_7', - '0b_0', - '0x_f', - '0o_5', - ] - INVALID_UNDERSCORE_LITERALS = [ - # Trailing underscores: - '0_', - '42_', - '1.4j_', - '0x_', - '0b1_', - '0xf_', - '0o5_', - # Underscores in the base selector: - '0_b0', - '0_xf', - '0_o5', - # Old-style octal, still disallowed: - '09_99', - # Multiple consecutive underscores: - '4_______2', - '0b1001__0100', - '0xfff__ffff', - '0x___', - '0o5__77', - '1e1__0', - ] - for x in VALID_UNDERSCORE_LITERALS: - print x - y = string_to_int(x, base=0, allow_underscores=True, - no_implicit_octal=True) - assert y == int(x.replace('_', ''), base=0) - for x in INVALID_UNDERSCORE_LITERALS: - print x - py.test.raises(ParseStringError, string_to_int, x, base=0, - allow_underscores=True) + @py.test.mark.parametrize('s', [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xfff_ffff', + '0o5_7_7', + '0b_0', + '0x_f', + '0o_5', + ]) + def test_valid_underscores(self, s): + result = string_to_int( + s, base=0, allow_underscores=True, no_implicit_octal=True) + assert result == int(s.replace('_', ''), base=0) + + @py.test.mark.parametrize('s', [ + # Leading underscores + '_100', + '_', + '_0b1001_0100', + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0b1001__0100', + '0xfff__ffff', + '0x___', + '0o5__77', + '1e1__0', + ]) + def test_invalid_underscores(self, s): + with py.test.raises(ParseStringError): + string_to_int(s, base=0, allow_underscores=True) def test_no_implicit_octal(self): TESTS = ['00', '000', '00_00', '02', '0377', '02_34'] From pypy.commits at gmail.com Sun Sep 2 05:52:54 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 02 Sep 2018 02:52:54 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix tests Message-ID: <5b8bb2f6.1c69fb81.d41bb.4340@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95072:99ad3d85cb7a Date: 2018-09-01 23:46 +0200 http://bitbucket.org/pypy/pypy/changeset/99ad3d85cb7a/ Log: fix tests diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -97,7 +97,7 @@ monkeypatch.setattr(jit, 'isconstant', lambda x: True) space = self.space w_res = space.call_function(space.w_bytes, space.wrap([42])) - assert space.text_w(w_res) == '*' + assert space.bytes_w(w_res) == b'*' class AppTestBytesObject: diff --git a/pypy/objspace/std/test/test_stdobjspace.py b/pypy/objspace/std/test/test_stdobjspace.py --- a/pypy/objspace/std/test/test_stdobjspace.py +++ b/pypy/objspace/std/test/test_stdobjspace.py @@ -93,4 +93,4 @@ from pypy.objspace.std.unicodeobject import W_UnicodeObject w_x = self.space.wrap('foo\xF0') assert isinstance(w_x, W_UnicodeObject) - assert w_x._utf8 == 'foo\uxF0' + assert w_x._utf8 == 'foo\xF0' diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -110,8 +110,8 @@ space = self.space w_uni = space.wrap(u'abcd') assert space.text_w(w_uni) == 'abcd' - # TODO : how to handle this? w_uni = space.wrap(unichr(0xd921) + unichr(0xdddd)) + # XXXX Test is from py3.5, should this still fail? space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -463,7 +463,7 @@ try: builder.append_code(codepoint) except ValueError: - raise oefmt(space.w_TypeError, + raise oefmt(space.w_ValueError, "character mapping must be in range(0x110000)") return self.from_utf8builder(builder) From pypy.commits at gmail.com Sun Sep 2 05:52:56 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 02 Sep 2018 02:52:56 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: use encode_utf8, str_decode_utf8, and maybe handle surrogates in the latter Message-ID: <5b8bb2f8.1c69fb81.9e55d.839b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95073:b040f44dc71b Date: 2018-09-02 10:18 +0200 http://bitbucket.org/pypy/pypy/changeset/b040f44dc71b/ Log: use encode_utf8, str_decode_utf8, and maybe handle surrogates in the latter diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -50,6 +50,23 @@ return u'', None, 0 return raise_unicode_exception_encode + at specialize.memo() +def encode_unicode_error_handler(space): + # Fast version of the "strict" errors handler. + def raise_unicode_exception_encode(errors, encoding, msg, uni, + startingpos, endingpos): + assert isinstance(uni, unicode) + u_len = len(uni) + utf8 = runicode.unicode_encode_utf8sp(uni, u_len) + raise OperationError(space.w_UnicodeEncodeError, + space.newtuple([space.newtext(encoding), + space.newtext(utf8, u_len), + space.newint(startingpos), + space.newint(endingpos), + space.newtext(msg)])) + return u'', None, 0 + return raise_unicode_exception_encode + def default_error_encode( errors, encoding, msg, u, startingpos, endingpos): """A default handler, for tests""" @@ -322,7 +339,6 @@ valid so we're trying to either raise or pack stuff with error handler. The key difference is that this is call_may_force """ - # XXX need to handle allow_surrogates slen = len(s) res = StringBuilder(slen) pos = 0 @@ -377,7 +393,7 @@ ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) - if rutf8._invalid_byte_2_of_3(ordch1, ordch2, True): + if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): r, pos = errorhandler(errors, "utf8", "invalid continuation byte", s, pos - 1, pos) res.append(r) @@ -994,7 +1010,7 @@ assert isinstance(uni, unicode) return runicode.unicode_encode_utf_8( uni, len(uni), "strict", - errorhandler=encode_error_handler(space), + errorhandler=encode_unicode_error_handler(space), allow_surrogates=allow_surrogates) def encode_utf8sp(space, uni): diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -7,6 +7,7 @@ find, rfind, count, endswith, replace, rsplit, split, startswith) from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import WrappedDefault, unwrap_spec +from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.objspace.std.sliceobject import W_SliceObject, unwrap_start_stop @@ -197,6 +198,12 @@ errors = 'strict' if encoding is None: encoding = 'utf8' + if encoding == 'utf8' or encoding == 'utf-8': + from pypy.module._codecs.interp_codecs import CodecState + state = space.fromcache(CodecState) + eh = state.decode_error_handler + s = space.charbuf_w(self) + ret, lgt, pos = str_decode_utf8(s, errors, True, eh) return decode_object(space, self, encoding, errors) @unwrap_spec(tabsize=int) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1898,12 +1898,8 @@ raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space).decode('utf8')) # XXX this is the only place in the code that this funcion is called. - # It does not translate, since it uses a pypy-level error handler - # to throw the UnicodeEncodeError not the rpython default handler - #return unicodehelper.encode_utf8(space, value, - # allow_surrogates=allow_surrogates) - assert isinstance(value, unicode) - return value.encode('utf8') + return unicodehelper.encode_utf8(space, value, + allow_surrogates=allow_surrogates) def _rpy_unicode_to_decimal_w(space, unistr): # XXX rewrite this to accept a utf8 string and use a StringBuilder From pypy.commits at gmail.com Sun Sep 2 05:52:58 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 02 Sep 2018 02:52:58 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: expand test Message-ID: <5b8bb2fa.1c69fb81.da75d.fc09@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95074:34c6d0d3499f Date: 2018-09-02 10:51 +0200 http://bitbucket.org/pypy/pypy/changeset/34c6d0d3499f/ Log: expand test diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -1031,4 +1031,5 @@ a = b'abcabc' id_b = id(str(a, 'latin1')) id_a = id(a) + assert a is not str(a, 'latin1') assert id_a != id_b From pypy.commits at gmail.com Sun Sep 2 05:53:01 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 02 Sep 2018 02:53:01 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: check class name for valid utf8 Message-ID: <5b8bb2fd.1c69fb81.df43.e12b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95075:f7c1e87b6a3c Date: 2018-09-02 11:25 +0200 http://bitbucket.org/pypy/pypy/changeset/f7c1e87b6a3c/ Log: check class name for valid utf8 diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -14,6 +14,7 @@ from rpython.rlib.objectmodel import current_object_addr_as_int, compute_hash from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rarithmetic import intmask, r_uint +from rpython.rlib.rutf8 import CheckError, check_utf8 class MutableCell(W_Root): def unwrap_cell(self, space): @@ -177,6 +178,15 @@ overridetypedef=None, force_new_layout=False, is_heaptype=True): self.space = space + try: + check_utf8(name, False) + except CheckError as e: + raise OperationError(space.w_UnicodeEncodeError, + space.newtuple([space.newtext('utf8'), + space.newtext(name), + space.newint(e.pos), + space.newint(e.pos + 1), + space.newtext('surrogates not allowed')])) self.name = name self.qualname = None self.bases_w = bases_w From pypy.commits at gmail.com Sun Sep 2 05:53:03 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 02 Sep 2018 02:53:03 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: revert 58568f219c61 Message-ID: <5b8bb2ff.1c69fb81.58a85.6850@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95076:cc3f3f3b7285 Date: 2018-09-02 11:51 +0200 http://bitbucket.org/pypy/pypy/changeset/cc3f3f3b7285/ Log: revert 58568f219c61 diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -50,7 +50,6 @@ chr((0x80 | ((code >> 12) & 0x3f))) + chr((0x80 | ((code >> 6) & 0x3f))) + chr((0x80 | (code & 0x3f)))) - return '?' raise ValueError @try_inline From pypy.commits at gmail.com Sun Sep 2 10:29:19 2018 From: pypy.commits at gmail.com (antocuni) Date: Sun, 02 Sep 2018 07:29:19 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: start a blog post draft about cpyext Message-ID: <5b8bf3bf.1c69fb81.87c31.dad7@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5890:01e42155cfe8 Date: 2018-09-02 16:28 +0200 http://bitbucket.org/pypy/extradoc/changeset/01e42155cfe8/ Log: start a blog post draft about cpyext diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst new file mode 100644 --- /dev/null +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -0,0 +1,92 @@ +Inside cpyext: why emulating CPython C API is so hard +====================================================== + +cpyext is PyPy's subsistem which is responsible to provide a compatibility +layer to compile and run CPython C extensions inside PyPy. Often people asks +why it this particular extension doesn't work or it is very slow on PyPy, but +usually it is hard to answer without going into technical details: the goal of +this blog post is to explain some of these technical details, so that we can +simply link here instead of explaing again and again :). + +From a 10.000 foot view, cpyext is PyPy's version of `"Python.h"`: every time +you compile and extension which uses that header file, you are using cpyext: +this includes extension explicitly written in C (such as `numpy`) and +extensions which are generated from other compilers/preprocessors +(e.g. `Cython`). + +At the time of writing, the current status is that most C extensions "just +work": generally speaking, you can simply `pip install` all of them, provided +they use the public, `official C API`_ instead of poking at private +implementation details. + +.. _`official C API`: https://docs.python.org/2/c-api/index.html + +Prologue: the PyPy GC +---------------------- + +To understand some of cpyext challenges, you need to have at least a rough +idea of how the PyPy GC works. + +Contrarily to the popular belief, the "Garbage Collector" is not only about +collecting garbage: instead, it is generally responsible of all memory +management, including allocation and deallocation. + +CPython uses a very simple memory management scheme: when you create an +object, you allocate a block of memory of the appropriate size on the heap: +depending on the details you might end up calling different allocators, but +for the sake of simplicity, you can think that this ends up being a call to +`malloc()`. Handles to objects have the C type `PyObject *`, which point to +the memory just allocated: this address never changes during the object +lifetime, and the C code can freely pass it around, store it inside +containers, retrieve it later, etc. + +Memory is managed using reference counting: when you create a new reference to +an object, or you discard a reference you own, you have to increment_ or +decrement_ reference counter accordingly. When the reference counter goes to +0, it means that the object is no longer used by anyone and can safely be +destroyed. Again, we can simplify and say that this results in a call to +`free()`, which finally releases the memory which was allocated by `malloc()`. + +.. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF +.. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF + +The PyPy GC is completely different: it is designed assuming that a dynamic +language like Python behaves the following way: + + - you create, either directly or indirectly, lots of objects; + + - most of these objects are temporary and very short-lived: think e.g. of + doing `a + b + c`: you need to allocate an object to hold the temporary + result of `a + b`, but it dies very quickly because you no longer need it + when you do the final `+ c` part; + + - only small fraction of the objects survives and stay around for a while. + +So, the strategy is: make allocation as fast as possible; make deallocation of +short-lived objects as fast as possible; find a way to handle the remaining +small set of objects which actually survive long enough to be important. + +This is done using a **Generational GC**: the basic idea is the following: + + 1. we have a nursery, where we allocate "young objects" very fast; + + 2. when the nursery is full, we start what we call a "minor collection": we + do quick scan to determine the small set of objects which survived so + far; + + 3. we **move** these objects out of the nursery, and we place them in the + area of memory which contains the "old objects"; since the address of the + objects just changed, we fix all the references to them accordingly; + + 4. now the nursery contains only objects which died young: we can simply + discard all of them very quickly, reset the nursery and use the same area + of memory to allocate new objects from now. + +In practice, this scheme works very well and it is one of the reasons why PyPy +is much faster than CPython. However, careful readers have surely noticed +that this is a problem for `cpyext`: on one hand, we have PyPy objects which +can potentially move and change their underlying memory address; on the other +hand, we need a way to represent them as fixed-address `PyObject *` when we +pass them to C extensions. We surely need a way to handle that. + + From pypy.commits at gmail.com Mon Sep 3 04:55:19 2018 From: pypy.commits at gmail.com (antocuni) Date: Mon, 03 Sep 2018 01:55:19 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: Merged in camara/extradoc-1/camara/cpyextrst-minor-edit-1535911546815 (pull request #12) Message-ID: <5b8cf6f7.1c69fb81.eb09b.e029@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5892:4cca973a5781 Date: 2018-09-03 08:55 +0000 http://bitbucket.org/pypy/extradoc/changeset/4cca973a5781/ Log: Merged in camara/extradoc-1/camara/cpyextrst-minor- edit-1535911546815 (pull request #12) cpyext.rst minor edit diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -3,7 +3,7 @@ cpyext is PyPy's subsistem which is responsible to provide a compatibility layer to compile and run CPython C extensions inside PyPy. Often people asks -why it this particular extension doesn't work or it is very slow on PyPy, but +why a particular extension doesn't work or it is very slow on PyPy, but usually it is hard to answer without going into technical details: the goal of this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). From pypy.commits at gmail.com Mon Sep 3 04:55:22 2018 From: pypy.commits at gmail.com (camara) Date: Mon, 03 Sep 2018 01:55:22 -0700 (PDT) Subject: [pypy-commit] extradoc camara/cpyextrst-minor-edit-1535911546815: cpyext.rst minor edit Message-ID: <5b8cf6fa.1c69fb81.e68a9.4e41@mx.google.com> Author: John M. Camara Branch: camara/cpyextrst-minor-edit-1535911546815 Changeset: r5891:521fe97856d7 Date: 2018-09-02 18:05 +0000 http://bitbucket.org/pypy/extradoc/changeset/521fe97856d7/ Log: cpyext.rst minor edit diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -3,7 +3,7 @@ cpyext is PyPy's subsistem which is responsible to provide a compatibility layer to compile and run CPython C extensions inside PyPy. Often people asks -why it this particular extension doesn't work or it is very slow on PyPy, but +why a particular extension doesn't work or it is very slow on PyPy, but usually it is hard to answer without going into technical details: the goal of this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). From pypy.commits at gmail.com Mon Sep 3 04:59:27 2018 From: pypy.commits at gmail.com (arigo) Date: Mon, 03 Sep 2018 01:59:27 -0700 (PDT) Subject: [pypy-commit] cffi default: Issue #381 Message-ID: <5b8cf7ef.1c69fb81.90126.f329@mx.google.com> Author: Armin Rigo Branch: Changeset: r3155:226094f5b5e8 Date: 2018-09-03 10:59 +0200 http://bitbucket.org/cffi/cffi/changeset/226094f5b5e8/ Log: Issue #381 more FreeBSD special cases diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -151,6 +151,7 @@ if 'freebsd' in sys.platform: include_dirs.append('/usr/local/include') + library_dirs.append('/usr/local/lib') if 'darwin' in sys.platform: try: From pypy.commits at gmail.com Mon Sep 3 06:13:26 2018 From: pypy.commits at gmail.com (antocuni) Date: Mon, 03 Sep 2018 03:13:26 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: this is the correct ReST way to write literals Message-ID: <5b8d0946.1c69fb81.2873a.068e@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5893:5d37d9c4714b Date: 2018-09-02 18:31 +0200 http://bitbucket.org/pypy/extradoc/changeset/5d37d9c4714b/ Log: this is the correct ReST way to write literals diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -8,14 +8,14 @@ this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). -From a 10.000 foot view, cpyext is PyPy's version of `"Python.h"`: every time +From a 10.000 foot view, cpyext is PyPy's version of ``"Python.h"``: every time you compile and extension which uses that header file, you are using cpyext: -this includes extension explicitly written in C (such as `numpy`) and +this includes extension explicitly written in C (such as ``numpy``) and extensions which are generated from other compilers/preprocessors -(e.g. `Cython`). +(e.g. ``Cython``). At the time of writing, the current status is that most C extensions "just -work": generally speaking, you can simply `pip install` all of them, provided +work": generally speaking, you can simply ``pip install`` all of them, provided they use the public, `official C API`_ instead of poking at private implementation details. @@ -35,7 +35,7 @@ object, you allocate a block of memory of the appropriate size on the heap: depending on the details you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to -`malloc()`. Handles to objects have the C type `PyObject *`, which point to +``malloc()``. Handles to objects have the C type ``PyObject *``, which point to the memory just allocated: this address never changes during the object lifetime, and the C code can freely pass it around, store it inside containers, retrieve it later, etc. @@ -45,7 +45,7 @@ decrement_ reference counter accordingly. When the reference counter goes to 0, it means that the object is no longer used by anyone and can safely be destroyed. Again, we can simplify and say that this results in a call to -`free()`, which finally releases the memory which was allocated by `malloc()`. +``free()``, which finally releases the memory which was allocated by ``malloc()``. .. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF @@ -56,9 +56,9 @@ - you create, either directly or indirectly, lots of objects; - most of these objects are temporary and very short-lived: think e.g. of - doing `a + b + c`: you need to allocate an object to hold the temporary - result of `a + b`, but it dies very quickly because you no longer need it - when you do the final `+ c` part; + doing ``a + b + c``: you need to allocate an object to hold the temporary + result of ``a + b``, but it dies very quickly because you no longer need it + when you do the final ``+ c`` part; - only small fraction of the objects survives and stay around for a while. @@ -84,9 +84,7 @@ In practice, this scheme works very well and it is one of the reasons why PyPy is much faster than CPython. However, careful readers have surely noticed -that this is a problem for `cpyext`: on one hand, we have PyPy objects which +that this is a problem for ``cpyext``: on one hand, we have PyPy objects which can potentially move and change their underlying memory address; on the other -hand, we need a way to represent them as fixed-address `PyObject *` when we +hand, we need a way to represent them as fixed-address ``PyObject *`` when we pass them to C extensions. We surely need a way to handle that. - - From pypy.commits at gmail.com Mon Sep 3 06:13:28 2018 From: pypy.commits at gmail.com (antocuni) Date: Mon, 03 Sep 2018 03:13:28 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: add an overview of the C API, and move the cpython memory managment section there Message-ID: <5b8d0948.1c69fb81.7f049.ab68@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5894:ce800d3284bd Date: 2018-09-03 12:12 +0200 http://bitbucket.org/pypy/extradoc/changeset/ce800d3284bd/ Log: add an overview of the C API, and move the cpython memory managment section there diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -15,30 +15,29 @@ (e.g. ``Cython``). At the time of writing, the current status is that most C extensions "just -work": generally speaking, you can simply ``pip install`` all of them, provided -they use the public, `official C API`_ instead of poking at private -implementation details. +work": generally speaking, you can simply ``pip install`` all of them, +provided they use the public, `official C API`_ instead of poking at private +implementation details. However, the performance of cpyext are generally +poor, meaning that a Python program which makes heavy use of cpyext extensions +is likely to be slower on PyPy than on CPython. .. _`official C API`: https://docs.python.org/2/c-api/index.html -Prologue: the PyPy GC ----------------------- -To understand some of cpyext challenges, you need to have at least a rough -idea of how the PyPy GC works. +C API Overview +--------------- -Contrarily to the popular belief, the "Garbage Collector" is not only about -collecting garbage: instead, it is generally responsible of all memory -management, including allocation and deallocation. +At the C level, Python objects are represented as ``PyObject *``, +i.e. (mostly) opaque pointers to some common "base struct". CPython uses a very simple memory management scheme: when you create an object, you allocate a block of memory of the appropriate size on the heap: depending on the details you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to -``malloc()``. Handles to objects have the C type ``PyObject *``, which point to -the memory just allocated: this address never changes during the object -lifetime, and the C code can freely pass it around, store it inside -containers, retrieve it later, etc. +``malloc()``. The resulting block of memory is initialized and casted to to +``PyObject *``: this address never changes during the object lifetime, and the +C code can freely pass it around, store it inside containers, retrieve it +later, etc. Memory is managed using reference counting: when you create a new reference to an object, or you discard a reference you own, you have to increment_ or @@ -50,8 +49,55 @@ .. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF -The PyPy GC is completely different: it is designed assuming that a dynamic -language like Python behaves the following way: +Generally speaking, the only way to operate on ``PyObject *`` is to call the +appropriate API functions. For example, to convert a given object as a C +integer, you can use _`PyInt_AsLong()`; to add to objects together, you can +call _`PyNumber_Add()` + +.. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html?highlight=pyint_check#c.PyInt_AsLong +.. _`PyNumber_Add()`: https://docs.python.org/2/c-api/number.html#c.PyNumber_Add + +Internally, PyPy uses a similar approach: all Python objects are subclasses of +the RPython ``W_Root`` class, and they are operated by calling methods on the +``space`` singleton, which represents the interpreter. + +At first, it looks very easy to write a compatibility layer: just make +``PyObject *`` an alias for ``W_Root``, and write simple RPython functions +(which will be translated to C by the RPython compiler) which call the +``space`` accordingly: + +.. sourcecode:: python + + def PyInt_AsLong(space, o): + return space.int_w(o) + + def PyNumber_Add(space, o1, o2): + return space.add(o1, o2) + + +Actually, the code above is not too far from the actual +implementation. However, there are tons of gory details which makes it much +harder than what it looks, and much slower unless you pay a lot of attention +to performance. + + +The PyPy GC +------------- + +To understand some of cpyext challenges, you need to have at least a rough +idea of how the PyPy GC works. + +XXX: maybe the following section is too detailed and not really necessary to +understand cpyext? We could simplify it by saying "PyPy uses a generational +GC, objects can move". + +Contrarily to the popular belief, the "Garbage Collector" is not only about +collecting garbage: instead, it is generally responsible of all memory +management, including allocation and deallocation. + +Whereas CPython uses a combination of malloc/free/refcounting to manage +memory, the PyPy GC uses a completely different approach. It is designed +assuming that a dynamic language like Python behaves the following way: - you create, either directly or indirectly, lots of objects; From pypy.commits at gmail.com Mon Sep 3 19:57:40 2018 From: pypy.commits at gmail.com (antocuni) Date: Mon, 03 Sep 2018 16:57:40 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: small tweaks Message-ID: <5b8dca74.1c69fb81.6d2a7.b736@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5895:d0dc59e05080 Date: 2018-09-04 01:36 +0200 http://bitbucket.org/pypy/extradoc/changeset/d0dc59e05080/ Log: small tweaks diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -3,7 +3,7 @@ cpyext is PyPy's subsistem which is responsible to provide a compatibility layer to compile and run CPython C extensions inside PyPy. Often people asks -why a particular extension doesn't work or it is very slow on PyPy, but +why it this particular extension doesn't work or it is very slow on PyPy, but usually it is hard to answer without going into technical details: the goal of this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). @@ -27,11 +27,11 @@ C API Overview --------------- -At the C level, Python objects are represented as ``PyObject *``, +In CPython, at the C level, Python objects are represented as ``PyObject *``, i.e. (mostly) opaque pointers to some common "base struct". CPython uses a very simple memory management scheme: when you create an -object, you allocate a block of memory of the appropriate size on the heap: +object, you allocate a block of memory of the appropriate size on the heap; depending on the details you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to ``malloc()``. The resulting block of memory is initialized and casted to to @@ -50,9 +50,9 @@ .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF Generally speaking, the only way to operate on ``PyObject *`` is to call the -appropriate API functions. For example, to convert a given object as a C -integer, you can use _`PyInt_AsLong()`; to add to objects together, you can -call _`PyNumber_Add()` +appropriate API functions. For example, to convert a given ``PyObject *`` to a C +integer, you can use _`PyInt_AsLong()`; to add two objects together, you can +call _`PyNumber_Add()`. .. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html?highlight=pyint_check#c.PyInt_AsLong .. _`PyNumber_Add()`: https://docs.python.org/2/c-api/number.html#c.PyNumber_Add @@ -76,7 +76,7 @@ Actually, the code above is not too far from the actual -implementation. However, there are tons of gory details which makes it much +implementation. However, there are tons of gory details which make it much harder than what it looks, and much slower unless you pay a lot of attention to performance. From pypy.commits at gmail.com Mon Sep 3 19:57:42 2018 From: pypy.commits at gmail.com (antocuni) Date: Mon, 03 Sep 2018 16:57:42 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: one more section Message-ID: <5b8dca76.1c69fb81.64711.f51d@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5896:02a6dcd8289f Date: 2018-09-04 01:57 +0200 http://bitbucket.org/pypy/extradoc/changeset/02a6dcd8289f/ Log: one more section diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -134,3 +134,51 @@ can potentially move and change their underlying memory address; on the other hand, we need a way to represent them as fixed-address ``PyObject *`` when we pass them to C extensions. We surely need a way to handle that. + + +`PyObject *` in PyPy +--------------------- + +Another challenge is that sometimes, ``PyObject *`` structs are not completely +opaque: there are parts of the public API which expose to the user specific +fields of some concrete C struct, for example the definition of PyTypeObject_: +since the low-level layout of PyPy ``W_Root`` objects is completely different +than the one used by CPython, we cannot simply pass RPython objects to C; we +need a way to handle the difference. + +.. _PyTypeObject: https://docs.python.org/2/c-api/typeobj.html + +So, we have two issues so far: objects which can move, and incompatible +low-level layouts. ``cpyext`` solves both by decoupling the RPython and the C +representations: we have two "views" of the same entity, depending on whether +we are in the PyPy world (the moving ``W_Root`` subclass) or in the C world +(the non-movable ``PyObject *``). + +``PyObject *`` are created lazily, only when they are actually needed: the +vast majority of PyPy objects are never passed to any C extension, so we don't +pay any penalty in that case; however, the first time we pass a ``W_Root`` to +C, we allocate and initialize its ``PyObject *`` counterpart. + +The same idea applies also to objects which are created in C, e.g. by calling +_`PyObject_New`: at first, only the ``PyObject *`` exists and it is +exclusively managed by reference counting: as soon as we pass it to the PyPy +world (e.g. as a return value of a function call), we create its ``W_Root`` +counterpart, which is managed by the GC as usual. + +.. _`PyObject_New`: https://docs.python.org/2/c-api/allocation.html#c.PyObject_New + +Here we start to see why calling cpyext modules is more costly in PyPy than in +CPython: we need to pay some penalty for all the conversions between +``W_Root`` and ``PyObject *``. + +Moreover, the first time we pass a ``W_Root`` to C we also need to allocate +the memory for the ``PyObject *`` using a slowish "CPython-style" memory +allocator: in practice, for all the objects which are passed to C we pay more +or less the same costs as CPython, thus effectively "undoing" the speedup +guaranteed by PyPy's Generational GC under normal circumstances. + + +Maintaining the link between ``W_Root`` and ``PyObject *`` +----------------------------------------------------------- + +WRITE ME From pypy.commits at gmail.com Tue Sep 4 06:24:20 2018 From: pypy.commits at gmail.com (antocuni) Date: Tue, 04 Sep 2018 03:24:20 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: write this section; I am not very satisfied about it, but better to write down some words that stare at the blank page; feel free to improve :) Message-ID: <5b8e5d54.1c69fb81.19f2a.cecb@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5897:cd51a2e3fc4d Date: 2018-09-04 12:23 +0200 http://bitbucket.org/pypy/extradoc/changeset/cd51a2e3fc4d/ Log: write this section; I am not very satisfied about it, but better to write down some words that stare at the blank page; feel free to improve :) diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -181,4 +181,43 @@ Maintaining the link between ``W_Root`` and ``PyObject *`` ----------------------------------------------------------- -WRITE ME +So, we need a way to convert between ``W_Root`` and ``PyObject *`` and +vice-versa; also, we need to to ensure that the lifetime of the two entities +are in sync. In particular: + + 1. as long as the ``W_Root`` is kept alive by the GC, we want the + ``PyObject *`` to live even if its refcount drops to 0; + + 2. as long as the ``PyObject *`` has a refcount greater than 0, we want to + make sure that the GC does not collect the ``W_Root``. + +The ``PyObject *`` ==> ``W_Root`` link is maintained by the special field +_`ob_pypy_link` which is added to all ``PyObject *``: on a 64 bit machine this +means that all ``PyObject *`` have 8 bytes of overhead, but then the +conversion is very quick, just reading the field. + +For the other direction, we generally don't want to do the same: the +assumption is that the vast majority of ``W_Root`` objects will never be +passed to C, and adding an overhead of 8 bytes to all of them is a +waste. Instead, in the general case the link is maintained by using a +dictionary, where ``W_Root`` are the keys and ``PyObject *`` the values. + +However, for a _`few selected` ``W_Root`` subclasses we **do** maintain a +direct link using the special ``_cpy_ref`` field to improve performance. In +particular, we use it for ``W_TypeObject`` (which is big anyway, so a 8 bytes +overhead is negligible) and ``W_NoneObject``: ``None`` is passed around very +often, so we want to ensure that the conversion to ``PyObject *`` is very +fast. Moreover it's a singleton, so the 8 bytes overhead is negligible as +well. + +This means that in theory, passing an arbitrary Python object to C is +potentially costly, because it involves doing a dictionary lookup. I assume +that this cost will eventually show up in the profiler: however, at the time +of writing there are other parts of cpyext which are even more costly (as we +will show later), so the cost of the dict lookup is never evident in the +profiler. + + +.. _`ob_pypy_link`: https://bitbucket.org/pypy/pypy/src/942ad6c1866e30d8094d1dae56a9b8f492554201/pypy/module/cpyext/parse/cpyext_object.h#lines-5 + +.. _`few selected`: https://bitbucket.org/pypy/pypy/src/942ad6c1866e30d8094d1dae56a9b8f492554201/pypy/module/cpyext/pyobject.py#lines-66 From pypy.commits at gmail.com Tue Sep 4 08:24:35 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 04 Sep 2018 05:24:35 -0700 (PDT) Subject: [pypy-commit] cffi default: Issue #382 Message-ID: <5b8e7983.1c69fb81.82d02.77e5@mx.google.com> Author: Armin Rigo Branch: Changeset: r3156:ef09637b2314 Date: 2018-09-04 14:24 +0200 http://bitbucket.org/cffi/cffi/changeset/ef09637b2314/ Log: Issue #382 Change the test to a non-floating-point example, where ignoring the return value should work even on x87. diff --git a/testing/cffi0/test_function.py b/testing/cffi0/test_function.py --- a/testing/cffi0/test_function.py +++ b/testing/cffi0/test_function.py @@ -45,14 +45,15 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_sin_no_return_value(self): + def test_getenv_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void sin(double x); + void getenv(char *); """) - m = ffi.dlopen(lib_m) - x = m.sin(1.23) + needs_dlopen_none() + m = ffi.dlopen(None) + x = m.getenv("FOO") assert x is None def test_dlopen_filename(self): From pypy.commits at gmail.com Tue Sep 4 15:12:05 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 04 Sep 2018 12:12:05 -0700 (PDT) Subject: [pypy-commit] cffi default: Issue #382 Message-ID: <5b8ed905.1c69fb81.eea60.8825@mx.google.com> Author: Armin Rigo Branch: Changeset: r3157:7a76a3815340 Date: 2018-09-04 21:11 +0200 http://bitbucket.org/cffi/cffi/changeset/7a76a3815340/ Log: Issue #382 Second fix attempt, thanks Adam diff --git a/testing/cffi0/test_function.py b/testing/cffi0/test_function.py --- a/testing/cffi0/test_function.py +++ b/testing/cffi0/test_function.py @@ -45,15 +45,14 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_getenv_no_return_value(self): + def test_lround_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void getenv(char *); + void lround(double x); """) - needs_dlopen_none() - m = ffi.dlopen(None) - x = m.getenv("FOO") + m = ffi.dlopen(lib_m) + x = m.lround(1.23) assert x is None def test_dlopen_filename(self): From pypy.commits at gmail.com Wed Sep 5 03:44:46 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 05 Sep 2018 00:44:46 -0700 (PDT) Subject: [pypy-commit] pypy default: Define select.PIPE_BUF only when it exists on CPython Message-ID: <5b8f896e.1c69fb81.c950a.444f@mx.google.com> Author: Armin Rigo Branch: Changeset: r95077:99c1daa566cf Date: 2018-09-05 09:43 +0200 http://bitbucket.org/pypy/pypy/changeset/99c1daa566cf/ Log: Define select.PIPE_BUF only when it exists on CPython diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,7 +3,7 @@ import sys import os -from select import PIPE_BUF +import select class Module(MixedModule): @@ -13,9 +13,11 @@ interpleveldefs = { 'select': 'interp_select.select', 'error' : 'space.fromcache(interp_select.Cache).w_error', - 'PIPE_BUF' : 'space.wrap(%r)' % PIPE_BUF, } + if hasattr(select, 'PIPE_BUF'): + interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % select.PIPE_BUF + if os.name =='posix': interpleveldefs['poll'] = 'interp_select.poll' diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py --- a/pypy/module/select/test/test_select.py +++ b/pypy/module/select/test/test_select.py @@ -245,10 +245,6 @@ raises(OverflowError, pollster.modify, 1, -1) raises(OverflowError, pollster.modify, 1, 1 << 64) - def test_PIPE_BUF(self): - import select - assert isinstance(select.PIPE_BUF, int) - class AppTestSelectWithPipes(_AppTestSelect): "Use a pipe to get pairs of file descriptors" @@ -322,6 +318,11 @@ # ^^^ CPython gives 100, PyPy gives 1. I think both are OK as # long as there is no crash. + def test_PIPE_BUF(self): + # no PIPE_BUF on Windows; this test class is skipped on Windows. + import select + assert isinstance(select.PIPE_BUF, int) + class AppTestSelectWithSockets(_AppTestSelect): """Same tests with connected sockets. From pypy.commits at gmail.com Wed Sep 5 08:10:27 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 05 Sep 2018 05:10:27 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: make csv bytes error more compatible Message-ID: <5b8fc7b3.1c69fb81.434de.c656@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95078:abeffc967e2e Date: 2018-09-02 17:01 +0200 http://bitbucket.org/pypy/pypy/changeset/abeffc967e2e/ Log: make csv bytes error more compatible diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -73,6 +73,9 @@ break raise self.line_num += 1 + if space.isinstance_w(w_line, space.w_bytes): + raise self.error(u"iterator should return strings, not bytes " + u"(did you open the file in text mode?") line = space.unicode_w(w_line) for c in line: if c == u'\0': diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py --- a/pypy/module/_csv/test/test_reader.py +++ b/pypy/module/_csv/test/test_reader.py @@ -33,7 +33,7 @@ def test_cannot_read_bytes(self): import _csv reader = _csv.reader([b'foo']) - raises((TypeError, _csv.Error), next, reader) + raises(_csv.Error, next, reader) def test_read_oddinputs(self): self._read_test([], []) From pypy.commits at gmail.com Wed Sep 5 08:10:30 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 05 Sep 2018 05:10:30 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fixes for failures in array, _pypyjson Message-ID: <5b8fc7b6.1c69fb81.7e54e.994c@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95079:68f116b03eb4 Date: 2018-09-02 17:11 +0200 http://bitbucket.org/pypy/pypy/changeset/68f116b03eb4/ Log: fixes for failures in array, _pypyjson diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -74,10 +74,6 @@ break return i - @specialize.arg(1) - def _raise(self, msg, *args): - raise oefmt(self.space.w_ValueError, msg, *args) - def decode_any(self, i): i = self.skip_whitespace(i) ch = self.ll_chars[i] @@ -330,10 +326,10 @@ i = self.decode_escape_sequence(i, builder) elif ch < '\x20': if ch == '\0': - self._raise("Unterminated string starting at char %d", + raise DecoderError("Unterminated string starting at", start - 1) else: - self._raise("Invalid control character at char %d", i-1) + raise DecoderError("Invalid control character at", i-1) else: builder.append(ch) @@ -368,7 +364,7 @@ val = self.decode_surrogate_pair(i, val) i += 6 except ValueError: - self._raise("Invalid \uXXXX escape (char %d)", i-1) + raise DecoderError("Invalid \uXXXX escape (char %d)", i-1) return # help the annotator to know that we'll never go beyond # this point # diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -1,4 +1,5 @@ from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import Utf8StringIterator HEX = '0123456789abcdef' @@ -25,8 +26,7 @@ sb = StringBuilder(len(u) + 20) - for i in range(len(u)): - c = ord(u[i]) + for c in Utf8StringIterator(u): if c <= ord('~'): if c == ord('"') or c == ord('\\'): sb.append('\\') diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -429,7 +429,7 @@ if len(s) % self.itemsize != 0: raise oefmt(space.w_ValueError, "bytes length not a multiple of item size") - self.check_valid_unicode(space, s) # empty for non-u arrays + #self.check_valid_unicode(space, s) # empty for non-u arrays oldlen = self.len new = len(s) / self.itemsize if not new: @@ -757,7 +757,7 @@ return space.newtext("array('%s')" % self.typecode) elif self.typecode == "u": r = space.repr(self.descr_tounicode(space)) - s = b"array('b', %s)" % space.utf8_w(r) + s = "array('%s', %s)" % (self.typecode, space.text_w(r)) return space.newtext(s) else: r = space.repr(self.descr_tolist(space)) From pypy.commits at gmail.com Wed Sep 5 08:10:32 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 05 Sep 2018 05:10:32 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5b8fc7b8.1c69fb81.b49bf.a29e@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95080:a0b258c2d6e0 Date: 2018-09-02 17:14 +0200 http://bitbucket.org/pypy/pypy/changeset/a0b258c2d6e0/ Log: merge py3.5 into branch diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -73,6 +73,9 @@ break raise self.line_num += 1 + if space.isinstance_w(w_line, space.w_bytes): + raise self.error(u"iterator should return strings, not bytes " + u"(did you open the file in text mode?") line = space.realunicode_w(w_line) for c in line: if c == b'\0': diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py --- a/pypy/module/_csv/test/test_reader.py +++ b/pypy/module/_csv/test/test_reader.py @@ -33,7 +33,7 @@ def test_cannot_read_bytes(self): import _csv reader = _csv.reader([b'foo']) - raises((TypeError, _csv.Error), next, reader) + raises(_csv.Error, next, reader) def test_read_oddinputs(self): self._read_test([], []) From pypy.commits at gmail.com Wed Sep 5 08:10:34 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 05 Sep 2018 05:10:34 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: use utf8 iterator in _csv.reader Message-ID: <5b8fc7ba.1c69fb81.dfd6f.a794@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95081:5fdece607d46 Date: 2018-09-02 17:28 +0200 http://bitbucket.org/pypy/pypy/changeset/5fdece607d46/ Log: use utf8 iterator in _csv.reader diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -1,4 +1,5 @@ from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rutf8 import Utf8StringIterator from rpython.rlib import objectmodel from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError @@ -76,9 +77,11 @@ if space.isinstance_w(w_line, space.w_bytes): raise self.error(u"iterator should return strings, not bytes " u"(did you open the file in text mode?") - line = space.realunicode_w(w_line) - for c in line: - if c == b'\0': + line = space.utf8_w(w_line) + for c in Utf8StringIterator(line): + # XXX rewrite this to use c (as int) not unichr(c) + c = unichr(c) + if c == '\0': raise self.error(u"line contains NULL byte") if state == START_RECORD: From pypy.commits at gmail.com Wed Sep 5 08:10:36 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 05 Sep 2018 05:10:36 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: use unicodehelper methods instead of rffi calls Message-ID: <5b8fc7bc.1c69fb81.967f9.b711@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95082:7dd0a62dbf67 Date: 2018-09-05 15:09 +0300 http://bitbucket.org/pypy/pypy/changeset/7dd0a62dbf67/ Log: use unicodehelper methods instead of rffi calls diff --git a/pypy/module/cpyext/test1/test_unicodeobject.py b/pypy/module/cpyext/test1/test_unicodeobject.py --- a/pypy/module/cpyext/test1/test_unicodeobject.py +++ b/pypy/module/cpyext/test1/test_unicodeobject.py @@ -659,9 +659,9 @@ b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') b_errors = rffi.str2charp('strict') - assert space.utf8_w(PyUnicode_Decode( - space, b_text, 4, b_encoding, b_errors)).decode() == u'caf\xe9' - assert (space.utf8_w( + assert space.text_w(PyUnicode_Decode( + space, b_text, 4, b_encoding, b_errors)).decode('utf8') == u'caf\xe9' + assert (space.text_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9'.encode("utf-8")) @@ -681,7 +681,7 @@ def test_decode_null_encoding(self, space): null_charp = lltype.nullptr(rffi.CCHARP.TO) u_text = u'abcdefg' - s_text = space.text_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) + s_text = space.bytes_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) assert (space.utf8_w(PyUnicode_Decode( space, b_text, len(s_text), null_charp, null_charp)) == diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,13 +1,13 @@ from rpython.rtyper.lltypesystem import rffi, lltype -from rpython.rlib.runicode import unicode_encode_latin_1, unicode_encode_utf_16_helper from rpython.rlib.rarithmetic import widen -from rpython.rlib import rstring, runicode +from rpython.rlib import rstring, runicode, rutf8 from rpython.tool.sourcetools import func_renamer from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.unicodehelper import ( wcharpsize2utf8, str_decode_utf_16_helper, str_decode_utf_32_helper, - unicode_encode_decimal) + unicode_encode_decimal, utf8_encode_utf_16_helper, BYTEORDER, + utf8_encode_utf_32_helper) from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( CANNOT_FAIL, Py_ssize_t, build_type_checkers, cpython_api, @@ -71,7 +71,7 @@ def unicode_attach(space, py_obj, w_obj, w_userdata=None): "Fills a newly allocated PyUnicodeObject with a unicode string" - value = space.utf8_w(w_obj).decode('utf8') + value = space.utf8_w(w_obj) set_wsize(py_obj, len(value)) set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO)) _readify(space, py_obj, value) @@ -271,20 +271,19 @@ assert isinstance(w_obj, unicodeobject.W_UnicodeObject) py_obj = as_pyobj(space, w_obj) assert get_kind(py_obj) == WCHAR_KIND - return _readify(space, py_obj, space.utf8_w(w_obj).decode('utf8')) + return _readify(space, py_obj, space.utf8_w(w_obj)) def _readify(space, py_obj, value): maxchar = 0 - for c in value: - if ord(c) > maxchar: - maxchar = ord(c) + for c in rutf8.Utf8StringIterator(value): + if c > maxchar: + maxchar = c if maxchar > MAX_UNICODE: raise oefmt(space.w_ValueError, "Character U+%d is not in range [U+0000; U+10ffff]", maxchar) if maxchar < 256: - ucs1_data = rffi.str2charp(unicode_encode_latin_1( - value, len(value), errors='strict')) + ucs1_data = rffi.str2charp(value) set_data(py_obj, cts.cast('void*', ucs1_data)) set_kind(py_obj, _1BYTE_KIND) set_len(py_obj, get_wsize(py_obj)) @@ -298,9 +297,9 @@ set_utf8_len(py_obj, 0) elif maxchar < 65536: # XXX: assumes that sizeof(wchar_t) == 4 - ucs2_str = unicode_encode_utf_16_helper( - value, len(value), errors='strict', - byteorder=runicode.BYTEORDER) + ucs2_str = utf8_encode_utf_16_helper( + value, 'strict', + byteorder=BYTEORDER) ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str)) set_data(py_obj, cts.cast('void*', ucs2_data)) set_len(py_obj, get_wsize(py_obj)) @@ -309,10 +308,14 @@ set_utf8_len(py_obj, 0) else: # XXX: assumes that sizeof(wchar_t) == 4 + ucs4_str = utf8_encode_utf_32_helper( + value, 'strict', + byteorder=BYTEORDER) if not get_wbuffer(py_obj): # Copy unicode buffer - set_wbuffer(py_obj, rffi.unicode2wcharp(value)) - set_wsize(py_obj, len(value)) + wchar = cts.cast('wchar_t*', rffi.str2charp(ucs4_str)) + set_wbuffer(py_obj, wchar) + set_wsize(py_obj, len(ucs4_str) // 4) ucs4_data = get_wbuffer(py_obj) set_data(py_obj, cts.cast('void*', ucs4_data)) set_len(py_obj, get_wsize(py_obj)) @@ -493,9 +496,10 @@ the codec.""" if not encoding: # This tracks CPython 2.7, in CPython 3.4 'utf-8' is hardcoded instead - encoding = PyUnicode_GetDefaultEncoding(space) + w_encoding = space.newtext('utf-8') + else: + w_encoding = space.newtext(rffi.charp2str(encoding)) w_str = space.newbytes(rffi.charpsize2str(s, size)) - w_encoding = space.newtext(rffi.charp2str(encoding)) if errors: w_errors = space.newtext(rffi.charp2str(errors)) else: From pypy.commits at gmail.com Wed Sep 5 15:22:30 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 05 Sep 2018 12:22:30 -0700 (PDT) Subject: [pypy-commit] pypy default: I remember now why we moved away from using CPython's definition of Message-ID: <5b902cf6.1c69fb81.83087.7e74@mx.google.com> Author: Armin Rigo Branch: Changeset: r95083:b9bbd6c09333 Date: 2018-09-05 21:20 +0200 http://bitbucket.org/pypy/pypy/changeset/b9bbd6c09333/ Log: I remember now why we moved away from using CPython's definition of constants. Now translation (and own tests) pass again under PyPy. diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,7 +3,7 @@ import sys import os -import select +from rpython.rlib import _rsocket_rffi as _c class Module(MixedModule): @@ -15,9 +15,6 @@ 'error' : 'space.fromcache(interp_select.Cache).w_error', } - if hasattr(select, 'PIPE_BUF'): - interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % select.PIPE_BUF - if os.name =='posix': interpleveldefs['poll'] = 'interp_select.poll' @@ -35,6 +32,10 @@ for symbol in symbol_map: interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol + if _c.PIPE_BUF is not None: + interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % _c.PIPE_BUF + + def buildloaders(cls): from rpython.rlib import rpoll for name in rpoll.eventnames: diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -33,6 +33,7 @@ 'arpa/inet.h', 'stdint.h', 'errno.h', + 'limits.h', ) if _HAS_AF_PACKET: includes += ('netpacket/packet.h', @@ -113,6 +114,7 @@ F_GETFL = platform.DefinedConstantInteger('F_GETFL') F_SETFL = platform.DefinedConstantInteger('F_SETFL') FIONBIO = platform.DefinedConstantInteger('FIONBIO') + PIPE_BUF = platform.DefinedConstantInteger('PIPE_BUF') INVALID_SOCKET = platform.DefinedConstantInteger('INVALID_SOCKET') INET_ADDRSTRLEN = platform.DefinedConstantInteger('INET_ADDRSTRLEN') @@ -1081,6 +1083,7 @@ EWOULDBLOCK = cConfig.EWOULDBLOCK or cConfig.WSAEWOULDBLOCK EAFNOSUPPORT = cConfig.EAFNOSUPPORT or cConfig.WSAEAFNOSUPPORT EISCONN = cConfig.EISCONN or cConfig.WSAEISCONN +PIPE_BUF = cConfig.PIPE_BUF # may be None linux = cConfig.linux WIN32 = cConfig.WIN32 From pypy.commits at gmail.com Wed Sep 5 19:56:36 2018 From: pypy.commits at gmail.com (antocuni) Date: Wed, 05 Sep 2018 16:56:36 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: write one more section, and add the skeleton for more to come Message-ID: <5b906d34.1c69fb81.967f9.75e1@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5898:d8b0a3d64722 Date: 2018-09-06 01:56 +0200 http://bitbucket.org/pypy/extradoc/changeset/d8b0a3d64722/ Log: write one more section, and add the skeleton for more to come diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -221,3 +221,93 @@ .. _`ob_pypy_link`: https://bitbucket.org/pypy/pypy/src/942ad6c1866e30d8094d1dae56a9b8f492554201/pypy/module/cpyext/parse/cpyext_object.h#lines-5 .. _`few selected`: https://bitbucket.org/pypy/pypy/src/942ad6c1866e30d8094d1dae56a9b8f492554201/pypy/module/cpyext/pyobject.py#lines-66 + + +Crossing the border between RPython and C +------------------------------------------ + +There are two other things we need to care about whenever we cross the border +between RPython and C, and vice-versa: exception handling and the GIL. + +In the C API, exceptions are raised by calling `PyErr_SetString()`_ (or one of +`many other functions`_ which have a similar effect), which basically works by +creating an exception value and storing it in some global variable; then, the +function signals that an exception has occurred by returning an error value, +usually ``NULL``. + +On the other hand, in the PyPy interpreter they are propagated by raising the +RPython-level OperationError_ exception, which wraps the actual app-level +exception values: to harmonize the two worlds, whenever we return from C to +RPython, we need to check whether a C API exception was raised and turn it +into an ``OperationError`` if needed. + +About the GIL, we won't dig into details of `how it is handled in cpyext`_: +for the purpose of this post, it is enough to know that whenever we enter the +C land, we store the current theead id into a global variable which is +accessible also from C; conversely, whenever we go back from RPython to C, we +restore this value to 0. + +Similarly, we need to the inverse operation whenever you need to cross the +border between C and RPython, e.g. by calling a Python callback from C code. + +All this complexity is automatically handled by the RPython function +`generic_cpy_call`_: if you look at the code you see that it takes care of 4 +things: + + 1. handling the GIL as explained above + + 2. handling exceptions, if they are raised + + 3. converting arguments from ``W_Root`` to ``PyObject *`` + + 4. converting the return value from ``PyObject *`` to ``W_Root`` + + +So, we can see that calling C from RPython introduce some overhead: how much +is it? + +Assuming that the conversion between ``W_Root`` and ``PyObject *`` has a +reasonable cost (as explained by the previous section), the overhead +introduced by a single border-cross is still accettable, especially if the +callee is doing some non-negligible amount of work. + +However this is not always the case; there are basically three problems that +make (or used to make) cpyext super slow: + + 1. paying the border-crossing cost for trivial operations which are called + very often, such as ``Py_INCREF`` + + 2. crossing the border back and forth many times, even if it's not strictly + needed + + 3. paying an excessive cost for argument and return value conversions + + +The next sections are going to explain in more detail each of these problems. + +.. _`PyErr_SetString()`: https://docs.python.org/2/c-api/exceptions.html#c.PyErr_SetString +.. _`many other functions`: https://docs.python.org/2/c-api/exceptions.html#exception-handling +.. _OperationError: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/interpreter/error.py#lines-20 +.. _`how it is handled in cpyext`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-205 +.. _`generic_cpy_call`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-1757 + + +Avoiding unnecessary roundtrips +-------------------------------- + +XXX basically, this section explains what we did in the cpyext-avoid-roundtrips branch, and what we still need to do + + +Conversion costs +----------------- + +XXX this is one of the biggest unsolved problems so far; explain or link to +this: + +https://bitbucket.org/pypy/extradoc/src/cd51a2e3fc4dac278074997c7dc198caee819769/planning/cpyext.txt#lines-27 + + +Borrowed references +-------------------- + +XXX explain why borrowed references are a problem for us; possibly link to: https://pythoncapi.readthedocs.io/bad_api.html#borrowed-references From pypy.commits at gmail.com Thu Sep 6 20:21:15 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 06 Sep 2018 17:21:15 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: typo Message-ID: <5b91c47b.1c69fb81.43d1e.e3e1@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5899:bf33b8d381b3 Date: 2018-09-07 01:02 +0200 http://bitbucket.org/pypy/extradoc/changeset/bf33b8d381b3/ Log: typo diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -247,7 +247,7 @@ accessible also from C; conversely, whenever we go back from RPython to C, we restore this value to 0. -Similarly, we need to the inverse operation whenever you need to cross the +Similarly, we need to do the inverse operations whenever you need to cross the border between C and RPython, e.g. by calling a Python callback from C code. All this complexity is automatically handled by the RPython function From pypy.commits at gmail.com Thu Sep 6 20:21:18 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 06 Sep 2018 17:21:18 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: better to use PyObject* without a space: it renders better in an HTML page Message-ID: <5b91c47e.1c69fb81.f70b4.4a10@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5900:b403731a96c4 Date: 2018-09-07 01:03 +0200 http://bitbucket.org/pypy/extradoc/changeset/b403731a96c4/ Log: better to use PyObject* without a space: it renders better in an HTML page diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -27,7 +27,7 @@ C API Overview --------------- -In CPython, at the C level, Python objects are represented as ``PyObject *``, +In CPython, at the C level, Python objects are represented as ``PyObject*``, i.e. (mostly) opaque pointers to some common "base struct". CPython uses a very simple memory management scheme: when you create an @@ -35,7 +35,7 @@ depending on the details you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to ``malloc()``. The resulting block of memory is initialized and casted to to -``PyObject *``: this address never changes during the object lifetime, and the +``PyObject*``: this address never changes during the object lifetime, and the C code can freely pass it around, store it inside containers, retrieve it later, etc. @@ -49,8 +49,8 @@ .. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF -Generally speaking, the only way to operate on ``PyObject *`` is to call the -appropriate API functions. For example, to convert a given ``PyObject *`` to a C +Generally speaking, the only way to operate on ``PyObject*`` is to call the +appropriate API functions. For example, to convert a given ``PyObject*`` to a C integer, you can use _`PyInt_AsLong()`; to add two objects together, you can call _`PyNumber_Add()`. @@ -62,7 +62,7 @@ ``space`` singleton, which represents the interpreter. At first, it looks very easy to write a compatibility layer: just make -``PyObject *`` an alias for ``W_Root``, and write simple RPython functions +``PyObject*`` an alias for ``W_Root``, and write simple RPython functions (which will be translated to C by the RPython compiler) which call the ``space`` accordingly: @@ -132,14 +132,14 @@ is much faster than CPython. However, careful readers have surely noticed that this is a problem for ``cpyext``: on one hand, we have PyPy objects which can potentially move and change their underlying memory address; on the other -hand, we need a way to represent them as fixed-address ``PyObject *`` when we +hand, we need a way to represent them as fixed-address ``PyObject*`` when we pass them to C extensions. We surely need a way to handle that. -`PyObject *` in PyPy +`PyObject*` in PyPy --------------------- -Another challenge is that sometimes, ``PyObject *`` structs are not completely +Another challenge is that sometimes, ``PyObject*`` structs are not completely opaque: there are parts of the public API which expose to the user specific fields of some concrete C struct, for example the definition of PyTypeObject_: since the low-level layout of PyPy ``W_Root`` objects is completely different @@ -152,15 +152,15 @@ low-level layouts. ``cpyext`` solves both by decoupling the RPython and the C representations: we have two "views" of the same entity, depending on whether we are in the PyPy world (the moving ``W_Root`` subclass) or in the C world -(the non-movable ``PyObject *``). +(the non-movable ``PyObject*``). -``PyObject *`` are created lazily, only when they are actually needed: the +``PyObject*`` are created lazily, only when they are actually needed: the vast majority of PyPy objects are never passed to any C extension, so we don't pay any penalty in that case; however, the first time we pass a ``W_Root`` to -C, we allocate and initialize its ``PyObject *`` counterpart. +C, we allocate and initialize its ``PyObject*`` counterpart. The same idea applies also to objects which are created in C, e.g. by calling -_`PyObject_New`: at first, only the ``PyObject *`` exists and it is +_`PyObject_New`: at first, only the ``PyObject*`` exists and it is exclusively managed by reference counting: as soon as we pass it to the PyPy world (e.g. as a return value of a function call), we create its ``W_Root`` counterpart, which is managed by the GC as usual. @@ -169,44 +169,44 @@ Here we start to see why calling cpyext modules is more costly in PyPy than in CPython: we need to pay some penalty for all the conversions between -``W_Root`` and ``PyObject *``. +``W_Root`` and ``PyObject*``. Moreover, the first time we pass a ``W_Root`` to C we also need to allocate -the memory for the ``PyObject *`` using a slowish "CPython-style" memory +the memory for the ``PyObject*`` using a slowish "CPython-style" memory allocator: in practice, for all the objects which are passed to C we pay more or less the same costs as CPython, thus effectively "undoing" the speedup guaranteed by PyPy's Generational GC under normal circumstances. -Maintaining the link between ``W_Root`` and ``PyObject *`` +Maintaining the link between ``W_Root`` and ``PyObject*`` ----------------------------------------------------------- -So, we need a way to convert between ``W_Root`` and ``PyObject *`` and +So, we need a way to convert between ``W_Root`` and ``PyObject*`` and vice-versa; also, we need to to ensure that the lifetime of the two entities are in sync. In particular: 1. as long as the ``W_Root`` is kept alive by the GC, we want the - ``PyObject *`` to live even if its refcount drops to 0; + ``PyObject*`` to live even if its refcount drops to 0; - 2. as long as the ``PyObject *`` has a refcount greater than 0, we want to + 2. as long as the ``PyObject*`` has a refcount greater than 0, we want to make sure that the GC does not collect the ``W_Root``. -The ``PyObject *`` ==> ``W_Root`` link is maintained by the special field -_`ob_pypy_link` which is added to all ``PyObject *``: on a 64 bit machine this -means that all ``PyObject *`` have 8 bytes of overhead, but then the +The ``PyObject*`` ==> ``W_Root`` link is maintained by the special field +_`ob_pypy_link` which is added to all ``PyObject*``: on a 64 bit machine this +means that all ``PyObject*`` have 8 bytes of overhead, but then the conversion is very quick, just reading the field. For the other direction, we generally don't want to do the same: the assumption is that the vast majority of ``W_Root`` objects will never be passed to C, and adding an overhead of 8 bytes to all of them is a waste. Instead, in the general case the link is maintained by using a -dictionary, where ``W_Root`` are the keys and ``PyObject *`` the values. +dictionary, where ``W_Root`` are the keys and ``PyObject*`` the values. However, for a _`few selected` ``W_Root`` subclasses we **do** maintain a direct link using the special ``_cpy_ref`` field to improve performance. In particular, we use it for ``W_TypeObject`` (which is big anyway, so a 8 bytes overhead is negligible) and ``W_NoneObject``: ``None`` is passed around very -often, so we want to ensure that the conversion to ``PyObject *`` is very +often, so we want to ensure that the conversion to ``PyObject*`` is very fast. Moreover it's a singleton, so the 8 bytes overhead is negligible as well. @@ -258,15 +258,15 @@ 2. handling exceptions, if they are raised - 3. converting arguments from ``W_Root`` to ``PyObject *`` + 3. converting arguments from ``W_Root`` to ``PyObject*`` - 4. converting the return value from ``PyObject *`` to ``W_Root`` + 4. converting the return value from ``PyObject*`` to ``W_Root`` So, we can see that calling C from RPython introduce some overhead: how much is it? -Assuming that the conversion between ``W_Root`` and ``PyObject *`` has a +Assuming that the conversion between ``W_Root`` and ``PyObject*`` has a reasonable cost (as explained by the previous section), the overhead introduced by a single border-cross is still accettable, especially if the callee is doing some non-negligible amount of work. From pypy.commits at gmail.com Thu Sep 6 20:21:20 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 06 Sep 2018 17:21:20 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: write another section, essentially explaing what we did in cpyext-avoid-roundtrips Message-ID: <5b91c480.1c69fb81.4a6db.cd89@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5901:a4b382cdada7 Date: 2018-09-07 02:18 +0200 http://bitbucket.org/pypy/extradoc/changeset/a4b382cdada7/ Log: write another section, essentially explaing what we did in cpyext- avoid-roundtrips diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -295,7 +295,101 @@ Avoiding unnecessary roundtrips -------------------------------- -XXX basically, this section explains what we did in the cpyext-avoid-roundtrips branch, and what we still need to do +Prior to the `2017 Cape Town Sprint`_, cpyext was horribly slow, and we were +well aware of it: the main reason was that we never really paid too much +attention to performances: as explained by this blog post, emulating all the +CPython quirks is basically a nightmare, so better to concentrate on +correctness first. + +However, we didn't really know **why** it was so slow: we had theories and +assumptions, usually pointing at the cost of conversions between ``W_Root`` +and ``PyObject*``, but we never actually measured it. + +So, I decided to write a set of `cpyext microbenchmarks`_ to measure the +performance of various operation. The result was somewhat surprising: the +theory suggests that when you do a cpyext C call, you should pay the +border-crossing costs only once, but what the profiler told us was that we +were paying the cost of ``generic_cpy_call`` several times what we expected. + +After a bit of investigation, we discovered this was ultimately caused by our +"correctness-first" approach. For simplicity of development and testing, when +we started cpyext we wrote everything in RPython: thus, every single API call +made from C (like the omnipresent `PyArg_ParseTuple`_, `PyInt_AsLong`_, etc.) +had to cross back the C-to-RPython border: this was especially daunting for +very simple and frequent operations like ``Py_INCREF`` and ``Py_DECREF``, +which CPython implements as a single assembly instruction! + +Another source of slowness was the implementation of ``PyTypeObject`` slots: +at the C level, these are function pointers which the interpreter calls to do +certain operations, e.g. `tp_new`_ to allocate a new instance of that type. + +As usual, we have some magic to implement slots in RPython; in particular, +`_make_wrapper`_ does the opposite of ``generic_cpy_call``: it takes an +RPython function and wraps it into a C function which can be safely called +from C, handling the GIL, exceptions and argument conversions automatically. + +This was very handy during the development of cpyext, but it might result in +some bad nonsense; consider what happens when you call the following C +function: + +.. sourcecode:: C + + static PyObject* foo(PyObject* self, PyObject* args) + { + PyObject* result = PyInt_FromLong(1234); + return result; + } + + 1. you are in RPython and do a cpyext call: **RPython-to-C**; + + 2. ``foo`` calls ``PyInt_FromLong(1234)``, which is implemented in RPython: + **C-to-RPython**; + + 3. the implementation of ``PyInt_FromLong`` indirectly calls + ``PyIntType.tp_new``, which is a C function pointer: **RPython-to-C**; + + 4. however, ``tp_new`` is just a wrapper around an RPython function, created + by ``_make_wrapper``: **C-to-RPython**; + + 5. finally, we create our RPython ``W_IntObject(1234)``; at some point + during the **RPython-to-C** crossing, its ``PyObject*`` equivalent is + created; + + 6. after many layers of wrappers, we are again in ``foo``: after we do + ``return result``, during the **C-to-RPython** step we convert it from + ``PyObject*`` to ``W_IntObject(1234)``. + +Phew! After we realized this, it was not so surprising that cpyext was very +slow :). And this was a simplified example, since we are not passing and +``PyObject*`` to the API call: if we did, we would need to convert it back and +forth at every step. Actually, I am not even sure that what I described was +the exact sequence of steps which used to happen, but you get the general +idea. + +The solution is simple: rewrite as much as we can in C instead of RPython, so +to avoid unnecessary roundtrips: this was the topic of most of the Cape Town +sprint and resulted in the ``cpyext-avoid-roundtrip``, which was eventually +merged_. + +Of course, it is not possible to move **everything** to C: there are still +operations which need to be implemented in RPython. For example, think of +``PyList_Append``: the logic to append an item to a list is complex and +involves list strategies, so we cannot replicate it in C. However, we +discovered that a large subset of the C API can benefit from this. + +Moreover, the C API is **huge**: the biggest achievement of the branch was to +discover and invent this new way of writing cpyext code, but we still need to +convert many of the functions. The results we got from this optimization are +impressive, as we will detail later. + + +.. _`2017 Cape Town Sprint`: https://morepypy.blogspot.com/2017/10/cape-of-good-hope-for-pypy-hello-from.html +.. _`cpyext microbenchmarks`: https://github.com/antocuni/cpyext-benchmarks +.. _`PyArg_ParseTuple`: https://docs.python.org/2/c-api/arg.html#c.PyArg_ParseTuple +.. _`PyInt_AsLong`: https://docs.python.org/2/c-api/int.html#c.PyInt_AsLong +.. _`tp_new`: https://docs.python.org/2/c-api/typeobj.html#c.PyTypeObject.tp_new +.. `_make_wrapper`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-362 +.. _merged: https://bitbucket.org/pypy/pypy/commits/7b550e9b3cee Conversion costs @@ -307,7 +401,9 @@ https://bitbucket.org/pypy/extradoc/src/cd51a2e3fc4dac278074997c7dc198caee819769/planning/cpyext.txt#lines-27 -Borrowed references +C API quirks -------------------- XXX explain why borrowed references are a problem for us; possibly link to: https://pythoncapi.readthedocs.io/bad_api.html#borrowed-references + +the calling convention is inefficient: why do I have to allocate a PyTuple* of PyObect*, just to unwrap them immediately? From pypy.commits at gmail.com Thu Sep 6 20:21:22 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 06 Sep 2018 17:21:22 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: rephrase Message-ID: <5b91c482.1c69fb81.6c5e9.11e8@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5902:6ef125946995 Date: 2018-09-07 02:21 +0200 http://bitbucket.org/pypy/extradoc/changeset/6ef125946995/ Log: rephrase diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -379,9 +379,12 @@ Moreover, the C API is **huge**: the biggest achievement of the branch was to discover and invent this new way of writing cpyext code, but we still need to -convert many of the functions. The results we got from this optimization are -impressive, as we will detail later. +convert many of the functions. Also, sometimes the rewrite is not automatic +or straighforward: cpyext is a delicate piece of software, so it happens often +that you end up debugging a segfault in gdb. +However, the most important remark is that the performance improvement we got +from this optimization are impressive, as we will detail later. .. _`2017 Cape Town Sprint`: https://morepypy.blogspot.com/2017/10/cape-of-good-hope-for-pypy-hello-from.html .. _`cpyext microbenchmarks`: https://github.com/antocuni/cpyext-benchmarks From pypy.commits at gmail.com Sun Sep 9 05:56:00 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 09 Sep 2018 02:56:00 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: blindly try to fix win32 translation Message-ID: <5b94ee30.1c69fb81.7e5d1.861b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95084:3c45a9eb3419 Date: 2018-09-09 12:54 +0300 http://bitbucket.org/pypy/pypy/changeset/3c45a9eb3419/ Log: blindly try to fix win32 translation diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -10,11 +10,6 @@ _WIN32 = sys.platform == 'win32' _MACOSX = sys.platform == 'darwin' -if _WIN32: - from rpython.rlib.runicode import str_decode_mbcs, unicode_encode_mbcs -else: - # Workaround translator's confusion - str_decode_mbcs = unicode_encode_mbcs = lambda *args, **kwargs: None @specialize.memo() def decode_error_handler(space): @@ -317,7 +312,7 @@ pos = rutf8._pos_at_index(s, newindex) return result.build() -if sys.platform == 'win32': +if _WIN32: def utf8_encode_mbcs(s, errors, errorhandler): s = s.decode('utf-8') if errorhandler is None: @@ -325,12 +320,12 @@ res = unicode_encode_mbcs(s, slen, errors, errorhandler) return res - def str_decode_mbcs(s, errors, final, errorhandler): + def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True): slen = len(s) if errorhandler is None: errorhandler = decode_error_handler(space) - res, size = str_decode_mbcs(s, slen, final=final, errors=errors, - errorhandler=errorhandler) + res, size = runicode.str_decode_mbcs(s, slen, final=final, errors=errors, + errorhandler=errorhandler, force_ignore=force_ignore) res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict') return res_utf8, len(res) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -703,31 +703,9 @@ ]: make_decoder_wrapper(decoder) -if hasattr(runicode, 'str_decode_mbcs'): - # mbcs functions are not regular, because we have to pass - # "force_ignore/replace=False" - @unwrap_spec(uni=unicode, errors='text_or_none') - def mbcs_encode(space, uni, errors="strict"): - if errors is None: - errors = 'strict' - state = space.fromcache(CodecState) - result = runicode.unicode_encode_mbcs( - uni, len(uni), errors, state.encode_error_handler, - force_replace=False) - return space.newtuple([space.newbytes(result), space.newint(len(uni))]) - - @unwrap_spec(string='bufferstr', errors='text_or_none', - w_final=WrappedDefault(False)) - def mbcs_decode(space, string, errors="strict", w_final=None): - if errors is None: - errors = 'strict' - final = space.is_true(w_final) - state = space.fromcache(CodecState) - result, length, pos = runicode.str_decode_mbcs( - string, len(string), errors, - final, state.decode_error_handler, - force_ignore=False) - return space.newtuple([space.newtext(result, length), space.newint(pos)]) +if hasattr(unicodehelper, 'str_decode_mbcs'): + make_encoder_wrapper('mbcs_encode') + make_decoder_wrapper('mbcs_decode') # utf-8 functions are not regular, because we have to pass # "allow_surrogates=False" From pypy.commits at gmail.com Sun Sep 9 06:11:35 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 09 Sep 2018 03:11:35 -0700 (PDT) Subject: [pypy-commit] cffi default: "python setup.py sdist" should not list the generated ABI module. Message-ID: <5b94f1d7.1c69fb81.d25af.7aa9@mx.google.com> Author: Armin Rigo Branch: Changeset: r3158:603d417606ae Date: 2018-09-09 12:11 +0200 http://bitbucket.org/cffi/cffi/changeset/603d417606ae/ Log: "python setup.py sdist" should not list the generated ABI module. See ``"python3 setup.py sdist" wrongly packages auto-generated file (out-of-line ABI-mode)`` on the mailing list. diff --git a/cffi/setuptools_ext.py b/cffi/setuptools_ext.py --- a/cffi/setuptools_ext.py +++ b/cffi/setuptools_ext.py @@ -162,6 +162,17 @@ module_path = module_name.split('.') module_path[-1] += '.py' generate_mod(os.path.join(self.build_lib, *module_path)) + def get_source_files(self): + # This is called from 'setup.py sdist' only. Exclude + # the generate .py module in this case. + saved_py_modules = self.py_modules + try: + if saved_py_modules: + self.py_modules = [m for m in saved_py_modules + if m != module_name] + return base_class.get_source_files(self) + finally: + self.py_modules = saved_py_modules dist.cmdclass['build_py'] = build_py_make_mod # distutils and setuptools have no notion I could find of a @@ -171,6 +182,7 @@ # the module. So we add it here, which gives a few apparently # harmless warnings about not finding the file outside the # build directory. + # Then we need to hack more in get_source_files(); see above. if dist.py_modules is None: dist.py_modules = [] dist.py_modules.append(module_name) From pypy.commits at gmail.com Sun Sep 9 16:26:23 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 09 Sep 2018 13:26:23 -0700 (PDT) Subject: [pypy-commit] pypy default: Typo (indent mistake) Message-ID: <5b9581ef.1c69fb81.853c9.6cfc@mx.google.com> Author: Armin Rigo Branch: Changeset: r95085:32f623879d94 Date: 2018-09-09 22:25 +0200 http://bitbucket.org/pypy/pypy/changeset/32f623879d94/ Log: Typo (indent mistake) diff --git a/rpython/doc/examples.rst b/rpython/doc/examples.rst --- a/rpython/doc/examples.rst +++ b/rpython/doc/examples.rst @@ -19,7 +19,7 @@ * Typhon, 'A virtual machine for Monte', in active development, https://github.com/monte-language/typhon * Tulip, an untyped functional language, in language design mode, maintained, - https://github.com/tulip-lang/tulip/ + https://github.com/tulip-lang/tulip/ * Pycket, a Racket implementation, proof of concept, small language core working, a lot of primitives are missing. Slow development https://github.com/samth/pycket From pypy.commits at gmail.com Sun Sep 9 19:50:19 2018 From: pypy.commits at gmail.com (antocuni) Date: Sun, 09 Sep 2018 16:50:19 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: write the section about conversion costs Message-ID: <5b95b1bb.1c69fb81.7e54e.c393@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5903:53fb8572d9f1 Date: 2018-09-10 01:50 +0200 http://bitbucket.org/pypy/extradoc/changeset/53fb8572d9f1/ Log: write the section about conversion costs diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -340,7 +340,7 @@ return result; } - 1. you are in RPython and do a cpyext call: **RPython-to-C**; + 1. you are in RPython and do a cpyext call to ``foo``: **RPython-to-C**; 2. ``foo`` calls ``PyInt_FromLong(1234)``, which is implemented in RPython: **C-to-RPython**; @@ -398,10 +398,74 @@ Conversion costs ----------------- -XXX this is one of the biggest unsolved problems so far; explain or link to -this: +The other potential big source of slowdown is the conversion of arguments +between ``W_Root`` and ``PyObject*``. -https://bitbucket.org/pypy/extradoc/src/cd51a2e3fc4dac278074997c7dc198caee819769/planning/cpyext.txt#lines-27 +As explained earlier, the first time you pass a ``W_Root`` to C, you need to +allocate it's ``PyObject*`` counterpart. Suppose to have a ``foo`` function +defined in C, which takes a single int argument: + +.. sourcecode:: python + + for i in range(N): + foo(i) + +To run this code, you need to create a different ``PyObject*`` for each value +of ``i``: if implemented naively, it means calling ``N`` times ``malloc()`` +and ``free()``, which kills performance. + +CPython has the very same problem, which is solved by using a `free list`_ to +`allocate ints`_. So, what we did was to simply `steal the code`_ from CPython +and do the exact same thing: this was also done in the +``cpyext-avoid-roundtrip`` branch, and the benchmarks show that it worked +perfectly. + +Every type which is converted often to ``PyObject*`` must have a very fast +allocator: at the moment of writing, PyPy uses free lists only for ints and +tuples_: one of the next steps on our TODO list is certainly to use this +technique with more types, like ``float``. + +Conversely, we also need to optimize the converstion from ``PyObject*`` to +``W_Root``: this happens when an object is originally allocated in C and +returned to Python. Consider for example the following code: + +.. sourcecode:: python + + import numpy as np + myarray = np.random.random(N) + for i in range(len(arr)): + myarray[i] + +At every iteration, we get an item out of the array: the return type is a an +instance of ``numpy.float64`` (a numpy scalar), i.e. a ``PyObject'*``: this is +something which is implemented by numpy entirely in C, so completely +transparent to cpyext: we don't have any control on how it is allocated, +managed, etc., and we can assume that allocation costs are the same than on +CPython. + +However, as soon as we return these ``PyObject*`` Python, we need to allocate +its ``W_Root`` equivalent: if you do it in a small loop like in the example +above, you end up allocating all these ``W_Root`` inside the nursery, which is +a good thing since allocation is super fast (see the section above about the +PyPy GC). + +However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link: +currently, we do this by putting all of them in a dictionary, but it is very +inefficient, especially because most of these objects dies young and thus it +is wasted work to do that for them. Currently, this is one of the biggest +unresolved problem in cpyext, and it is what casuses the two microbenchmarks +``allocate_int`` and ``allocate_tuple`` to be very slow. + +We are well aware of the problem, and we have a plan for how to fix it; the +explanation is too technical for the scope of this blog post as it requires a +deep knowledge of the GC internals to be understood, but the details are +here_. + +.. _`free list`: https://en.wikipedia.org/wiki/Free_list +.. _`allocate ints`: https://github.com/python/cpython/blob/2.7/Objects/intobject.c#L16 +.. _`steal the code`: https://bitbucket.org/pypy/pypy/commits/e5c7b7f85187 +.. _tuples: https://bitbucket.org/pypy/pypy/commits/ccf12107e805 +.. _here: https://bitbucket.org/pypy/extradoc/src/cd51a2e3fc4dac278074997c7dc198caee819769/planning/cpyext.txt#lines-27 C API quirks From pypy.commits at gmail.com Mon Sep 10 04:08:22 2018 From: pypy.commits at gmail.com (arigo) Date: Mon, 10 Sep 2018 01:08:22 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Test for issue #2884 Message-ID: <5b962676.1c69fb81.263d.2973@mx.google.com> Author: Armin Rigo Branch: py3.6 Changeset: r95086:6be2fb2a8b03 Date: 2018-09-10 10:07 +0200 http://bitbucket.org/pypy/pypy/changeset/6be2fb2a8b03/ Log: Test for issue #2884 diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -1526,4 +1526,8 @@ assert counts[ops.BUILD_MAP] == 1 # the empty dict assert counts[ops.BUILD_CONST_KEY_MAP] == 2 - + def test_annotation_issue2884(self): + source = """def f(): + a: list = [j for j in range(10)] + """ + generate_function_code(source, self.space) From pypy.commits at gmail.com Mon Sep 10 04:18:37 2018 From: pypy.commits at gmail.com (arigo) Date: Mon, 10 Sep 2018 01:18:37 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Fix for 6be2fb2a8b03 (issue2884) Message-ID: <5b9628dd.1c69fb81.78c43.c8df@mx.google.com> Author: Armin Rigo Branch: py3.6 Changeset: r95087:dd78db026ae0 Date: 2018-09-10 10:16 +0200 http://bitbucket.org/pypy/pypy/changeset/dd78db026ae0/ Log: Fix for 6be2fb2a8b03 (issue2884) diff --git a/pypy/interpreter/astcompiler/symtable.py b/pypy/interpreter/astcompiler/symtable.py --- a/pypy/interpreter/astcompiler/symtable.py +++ b/pypy/interpreter/astcompiler/symtable.py @@ -436,6 +436,8 @@ self.note_symbol(name, scope) else: target.walkabout(self) + if assign.value is not None: + assign.value.walkabout(self) def visit_ClassDef(self, clsdef): self.note_symbol(clsdef.name, SYM_ASSIGNED) From pypy.commits at gmail.com Tue Sep 11 08:44:36 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 05:44:36 -0700 (PDT) Subject: [pypy-commit] pypy default: issue2885 fix Message-ID: <5b97b8b4.1c69fb81.12e9f.6e09@mx.google.com> Author: Armin Rigo Branch: Changeset: r95088:6a861cb724d1 Date: 2018-09-11 14:43 +0200 http://bitbucket.org/pypy/pypy/changeset/6a861cb724d1/ Log: issue2885 fix diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -323,7 +323,8 @@ ------------- * Hash randomization (``-R``) `is ignored in PyPy`_. In CPython - before 3.4 it has `little point`_. + before 3.4 it has `little point`_. Both CPython >= 3.4 and PyPy3 + implement the randomized SipHash algorithm and ignore ``-R``. * You can't store non-string keys in type objects. For example:: From pypy.commits at gmail.com Tue Sep 11 09:27:57 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 06:27:57 -0700 (PDT) Subject: [pypy-commit] pypy default: Add more tests inspired from issue #2866. Message-ID: <5b97c2dd.1c69fb81.16970.f3f6@mx.google.com> Author: Armin Rigo Branch: Changeset: r95089:baf31e0bca1e Date: 2018-09-11 15:26 +0200 http://bitbucket.org/pypy/pypy/changeset/baf31e0bca1e/ Log: Add more tests inspired from issue #2866. Fix them by simplifying a bit descr_new(). diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1146,3 +1146,26 @@ raises(TypeError, "unicode('', encoding=None)") raises(TypeError, 'u"".encode("utf-8", None)') + def test_unicode_constructor_misc(self): + x = u'foo' + x += u'bar' + assert unicode(x) is x + # + class U(unicode): + def __unicode__(self): + return u'BOK' + u = U(x) + assert unicode(u) == u'BOK' + # + class U2(unicode): + pass + z = U2(u'foobaz') + assert type(unicode(z)) is unicode + assert unicode(z) == u'foobaz' + # + e = raises(TypeError, unicode, u'text', 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, u, 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, z, 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -201,27 +201,17 @@ encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) - # convoluted logic for the case when unicode subclass has a __unicode__ - # method, we need to call this method - is_precisely_unicode = space.is_w(space.type(w_obj), space.w_unicode) - if (is_precisely_unicode or - (space.isinstance_w(w_obj, space.w_unicode) and - space.findattr(w_obj, space.newtext('__unicode__')) is None)): - if encoding is not None or errors is not None: + if encoding is None and errors is None: + # this is very quick if w_obj is already a w_unicode + w_value = unicode_from_object(space, w_obj) + else: + if space.isinstance_w(w_obj, space.w_unicode): raise oefmt(space.w_TypeError, "decoding Unicode is not supported") - if (is_precisely_unicode and - space.is_w(w_unicodetype, space.w_unicode)): - return w_obj - w_value = w_obj - else: - if encoding is None and errors is None: - w_value = unicode_from_object(space, w_obj) - else: - w_value = unicode_from_encoded_object(space, w_obj, - encoding, errors) - if space.is_w(w_unicodetype, space.w_unicode): - return w_value + w_value = unicode_from_encoded_object(space, w_obj, + encoding, errors) + if space.is_w(w_unicodetype, space.w_unicode): + return w_value assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) From pypy.commits at gmail.com Tue Sep 11 09:31:26 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 06:31:26 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b97c3ae.1c69fb81.caec4.97a1@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95090:ae9f77ce8df2 Date: 2018-09-11 15:30 +0200 http://bitbucket.org/pypy/pypy/changeset/ae9f77ce8df2/ Log: hg merge default diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -330,7 +330,8 @@ ------------- * Hash randomization (``-R``) `is ignored in PyPy`_. In CPython - before 3.4 it has `little point`_. + before 3.4 it has `little point`_. Both CPython >= 3.4 and PyPy3 + implement the randomized SipHash algorithm and ignore ``-R``. * You can't store non-string keys in type objects. For example:: diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,6 +3,7 @@ import sys import os +from rpython.rlib import _rsocket_rffi as _c class Module(MixedModule): @@ -31,6 +32,10 @@ for symbol in symbol_map: interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol + if _c.PIPE_BUF is not None: + interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % _c.PIPE_BUF + + def buildloaders(cls): from rpython.rlib import rpoll for name in rpoll.eventnames: diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py --- a/pypy/module/select/test/test_select.py +++ b/pypy/module/select/test/test_select.py @@ -319,6 +319,11 @@ # ^^^ CPython gives 100, PyPy gives 1. I think both are OK as # long as there is no crash. + def test_PIPE_BUF(self): + # no PIPE_BUF on Windows; this test class is skipped on Windows. + import select + assert isinstance(select.PIPE_BUF, int) + class AppTestSelectWithSockets(_AppTestSelect): """Same tests with connected sockets. diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1088,3 +1088,26 @@ assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345' assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 ' + def test_unicode_constructor_misc(self): + x = u'foo' + x += u'bar' + assert str(x) is x + # + class U(str): + def __str__(self): + return u'BOK' + u = U(x) + assert str(u) == u'BOK' + # + class U2(str): + pass + z = U2(u'foobaz') + assert type(str(z)) is str + assert str(z) == u'foobaz' + # + e = raises(TypeError, str, u'text', 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, z, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' diff --git a/rpython/doc/examples.rst b/rpython/doc/examples.rst --- a/rpython/doc/examples.rst +++ b/rpython/doc/examples.rst @@ -19,7 +19,7 @@ * Typhon, 'A virtual machine for Monte', in active development, https://github.com/monte-language/typhon * Tulip, an untyped functional language, in language design mode, maintained, - https://github.com/tulip-lang/tulip/ + https://github.com/tulip-lang/tulip/ * Pycket, a Racket implementation, proof of concept, small language core working, a lot of primitives are missing. Slow development https://github.com/samth/pycket diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -33,6 +33,7 @@ 'arpa/inet.h', 'stdint.h', 'errno.h', + 'limits.h', ) if _HAS_AF_PACKET: includes += ('netpacket/packet.h', @@ -113,6 +114,7 @@ F_GETFL = platform.DefinedConstantInteger('F_GETFL') F_SETFL = platform.DefinedConstantInteger('F_SETFL') FIONBIO = platform.DefinedConstantInteger('FIONBIO') + PIPE_BUF = platform.DefinedConstantInteger('PIPE_BUF') INVALID_SOCKET = platform.DefinedConstantInteger('INVALID_SOCKET') INET_ADDRSTRLEN = platform.DefinedConstantInteger('INET_ADDRSTRLEN') @@ -1085,6 +1087,7 @@ WSAEWOULDBLOCK = cConfig.WSAEWOULDBLOCK or cConfig.EWOULDBLOCK WSAEAFNOSUPPORT = cConfig.WSAEAFNOSUPPORT or cConfig.EAFNOSUPPORT EISCONN = cConfig.EISCONN or cConfig.WSAEISCONN +PIPE_BUF = cConfig.PIPE_BUF # may be None linux = cConfig.linux WIN32 = cConfig.WIN32 From pypy.commits at gmail.com Tue Sep 11 09:37:39 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 06:37:39 -0700 (PDT) Subject: [pypy-commit] pypy default: two completely corner cases where we differ from CPython Message-ID: <5b97c523.1c69fb81.c950a.353b@mx.google.com> Author: Armin Rigo Branch: Changeset: r95091:6e1de6dbe769 Date: 2018-09-11 15:36 +0200 http://bitbucket.org/pypy/pypy/changeset/6e1de6dbe769/ Log: two completely corner cases where we differ from CPython diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1163,7 +1163,12 @@ assert type(unicode(z)) is unicode assert unicode(z) == u'foobaz' # - e = raises(TypeError, unicode, u'text', 'supposedly_the_encoding') + # two completely corner cases where we differ from CPython: + #assert unicode(encoding='supposedly_the_encoding') == u'' + #assert unicode(errors='supposedly_the_error') == u'' + e = raises(TypeError, unicode, u'', 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, u'', errors='supposedly_the_error') assert str(e.value) == 'decoding Unicode is not supported' e = raises(TypeError, unicode, u, 'supposedly_the_encoding') assert str(e.value) == 'decoding Unicode is not supported' From pypy.commits at gmail.com Tue Sep 11 09:43:19 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 06:43:19 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b97c677.1c69fb81.f2695.dd88@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95092:cbe861385419 Date: 2018-09-11 15:38 +0200 http://bitbucket.org/pypy/pypy/changeset/cbe861385419/ Log: hg merge default diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1105,7 +1105,12 @@ assert type(str(z)) is str assert str(z) == u'foobaz' # - e = raises(TypeError, str, u'text', 'supposedly_the_encoding') + # two completely corner cases where we differ from CPython: + #assert unicode(encoding='supposedly_the_encoding') == u'' + #assert unicode(errors='supposedly_the_error') == u'' + e = raises(TypeError, str, u'', 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u'', errors='supposedly_the_error') assert str(e.value) == 'decoding str is not supported' e = raises(TypeError, str, u, 'supposedly_the_encoding') assert str(e.value) == 'decoding str is not supported' From pypy.commits at gmail.com Tue Sep 11 09:43:23 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 06:43:23 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Issue #2866 Message-ID: <5b97c67b.1c69fb81.bfe07.0016@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95093:321fc60db035 Date: 2018-09-11 15:42 +0200 http://bitbucket.org/pypy/pypy/changeset/321fc60db035/ Log: Issue #2866 Fix by copying the (now-fixed) logic from default. diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -397,14 +397,14 @@ assert str(123) == '123' assert str(object=123) == '123' assert str([2, 3]) == '[2, 3]' - assert str(errors='strict') == '' + #assert str(errors='strict') == '' --- obscure case, disabled for now class U(str): pass assert str(U()).__class__ is str assert U().__str__().__class__ is str assert U('test') == 'test' assert U('test').__class__ is U - assert U(errors='strict') == U('') + #assert U(errors='strict') == U('') --- obscure case, disabled for now def test_call_unicode_2(self): class X(object): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -224,15 +224,20 @@ def descr_new(space, w_unicodetype, w_object=None, w_encoding=None, w_errors=None): if w_object is None: - w_value = W_UnicodeObject.EMPTY + w_object = W_UnicodeObject.EMPTY + w_obj = w_object + + encoding, errors = _get_encoding_and_errors(space, w_encoding, + w_errors) + if encoding is None and errors is None: + # this is very quick if w_obj is already a w_unicode + w_value = unicode_from_object(space, w_obj) else: - encoding, errors = _get_encoding_and_errors(space, w_encoding, - w_errors) - if encoding is None and errors is None: - w_value = unicode_from_object(space, w_object) - else: - w_value = unicode_from_encoded_object(space, w_object, - encoding, errors) + if space.isinstance_w(w_obj, space.w_unicode): + raise oefmt(space.w_TypeError, + "decoding str is not supported") + w_value = unicode_from_encoded_object(space, w_obj, + encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value From pypy.commits at gmail.com Tue Sep 11 16:32:59 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:32:59 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: align function call with signature Message-ID: <5b98267b.1c69fb81.a7bcc.2d4f@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95094:744c4148953f Date: 2018-09-09 13:17 +0300 http://bitbucket.org/pypy/pypy/changeset/744c4148953f/ Log: align function call with signature diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -79,7 +79,7 @@ state = space.fromcache(interp_codecs.CodecState) if _WIN32: bytes = space.bytes_w(w_string) - uni = str_decode_mbcs(bytes, len(bytes), 'strict', + uni = str_decode_mbcs(bytes, 'strict', errorhandler=decode_error_handler(space), force_ignore=False)[0] elif _MACOSX: @@ -324,7 +324,7 @@ slen = len(s) if errorhandler is None: errorhandler = decode_error_handler(space) - res, size = runicode.str_decode_mbcs(s, slen, final=final, errors=errors, + res, size = runicode.str_decode_mbcs(s, slen, errors, final=final, errorhandler=errorhandler, force_ignore=force_ignore) res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict') return res_utf8, len(res) From pypy.commits at gmail.com Tue Sep 11 16:33:04 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:33:04 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5b982680.1c69fb81.1e0b1.ea15@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95095:4ca367f8c139 Date: 2018-09-11 20:56 +0300 http://bitbucket.org/pypy/pypy/changeset/4ca367f8c139/ Log: merge default into branch diff too long, truncating to 2000 out of 6123 lines diff --git a/lib-python/2.7/hashlib.py b/lib-python/2.7/hashlib.py --- a/lib-python/2.7/hashlib.py +++ b/lib-python/2.7/hashlib.py @@ -136,9 +136,14 @@ __get_hash = __get_openssl_constructor algorithms_available = algorithms_available.union( _hashlib.openssl_md_meth_names) -except ImportError: +except ImportError as e: new = __py_new __get_hash = __get_builtin_constructor + # added by PyPy + import warnings + warnings.warn("The _hashlib module is not available, falling back " + "to a much slower implementation (%s)" % str(e), + RuntimeWarning) for __func_name in __always_supported: # try them all, some may not work due to the OpenSSL diff --git a/lib-python/2.7/types.py b/lib-python/2.7/types.py --- a/lib-python/2.7/types.py +++ b/lib-python/2.7/types.py @@ -83,9 +83,19 @@ DictProxyType = type(TypeType.__dict__) NotImplementedType = type(NotImplemented) -# For Jython, the following two types are identical +# +# On CPython, FunctionType.__code__ is a 'getset_descriptor', but +# FunctionType.__globals__ is a 'member_descriptor', just like app-level +# slots. On PyPy, all descriptors of built-in types are +# 'getset_descriptor', but the app-level slots are 'member_descriptor' +# as well. (On Jython the situation might still be different.) +# +# Note that MemberDescriptorType was equal to GetSetDescriptorType in +# PyPy <= 6.0. +# GetSetDescriptorType = type(FunctionType.func_code) -MemberDescriptorType = type(FunctionType.func_globals) +class _C(object): __slots__ = 's' +MemberDescriptorType = type(_C.s) del sys, _f, _g, _C, _x # Not for export diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -183,6 +183,7 @@ self._buffer = self._ffiarray(self._length_, autofree=True) for i, arg in enumerate(args): self[i] = arg + _init_no_arg_ = __init__ def _fix_index(self, index): if index < 0: diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -120,7 +120,7 @@ raise ValueError( "Buffer size too small (%d instead of at least %d bytes)" % (len(buf) + offset, size + offset)) - result = self() + result = self._newowninstance_() dest = result._buffer.buffer try: raw_addr = buf._pypy_raw_address() @@ -131,6 +131,11 @@ memmove(dest, raw_addr, size) return result + def _newowninstance_(self): + result = self.__new__(self) + result._init_no_arg_() + return result + class CArgObject(object): """ simple wrapper around buffer, just for the case of freeing @@ -162,6 +167,7 @@ def __init__(self, *args, **kwds): raise TypeError("%s has no type" % (type(self),)) + _init_no_arg_ = __init__ def _ensure_objects(self): if '_objects' not in self.__dict__: diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -268,6 +268,7 @@ return raise TypeError("Unknown constructor %s" % (args,)) + _init_no_arg_ = __init__ def _wrap_callable(self, to_call, argtypes): def f(*args): @@ -557,7 +558,7 @@ keepalive, newarg, newargtype = self._conv_param(argtype, defval) else: import ctypes - val = argtype._type_() + val = argtype._type_._newowninstance_() keepalive = None newarg = ctypes.byref(val) newargtype = type(newarg) diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -67,8 +67,11 @@ self._buffer = ffiarray(1, autofree=True) if value is not None: self.contents = value + def _init_no_arg_(self): + self._buffer = ffiarray(1, autofree=True) self._ffiarray = ffiarray self.__init__ = __init__ + self._init_no_arg_ = _init_no_arg_ self._type_ = TP def _build_ffiargtype(self): @@ -137,27 +140,21 @@ if not (isinstance(tp, _CDataMeta) and tp._is_pointer_like()): raise TypeError("cast() argument 2 must be a pointer type, not %s" % (tp,)) + result = tp._newowninstance_() if isinstance(obj, (int, long)): - result = tp() result._buffer[0] = obj return result elif obj is None: - result = tp() return result elif isinstance(obj, Array): - ptr = tp.__new__(tp) - ptr._buffer = tp._ffiarray(1, autofree=True) - ptr._buffer[0] = obj._buffer - result = ptr + result._buffer[0] = obj._buffer elif isinstance(obj, bytes): - result = tp() result._buffer[0] = buffer(obj)._pypy_raw_address() return result elif not (isinstance(obj, _CData) and type(obj)._is_pointer_like()): raise TypeError("cast() argument 1 must be a pointer, not %s" % (type(obj),)) else: - result = tp() result._buffer[0] = obj._buffer[0] # The casted objects '_objects' member: diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -390,11 +390,14 @@ self._buffer = self._ffiarray(1, autofree=True) if value is not DEFAULT_VALUE: self.value = value + _init_no_arg_ = __init__ def _ensure_objects(self): - if self._type_ not in 'zZP': - assert self._objects is None - return self._objects + # No '_objects' is the common case for primitives. Examples + # where there is an _objects is if _type in 'zZP', or if + # self comes from 'from_buffer(buf)'. See module/test_lib_pypy/ + # ctypes_test/test_buffers.py: test_from_buffer_keepalive. + return getattr(self, '_objects', None) def _getvalue(self): return self._buffer[0] diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -281,6 +281,7 @@ self.__setattr__(name, arg) for name, arg in kwds.items(): self.__setattr__(name, arg) + _init_no_arg_ = __init__ def _subarray(self, fieldtype, name): """Return a _rawffi array of length 1 whose address is the same as diff --git a/lib_pypy/cffi/_cffi_errors.h b/lib_pypy/cffi/_cffi_errors.h --- a/lib_pypy/cffi/_cffi_errors.h +++ b/lib_pypy/cffi/_cffi_errors.h @@ -50,7 +50,9 @@ "import sys\n" "class FileLike:\n" " def write(self, x):\n" - " of.write(x)\n" + " try:\n" + " of.write(x)\n" + " except: pass\n" " self.buf += x\n" "fl = FileLike()\n" "fl.buf = ''\n" diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -8,20 +8,43 @@ the same works for the other two macros. Py_DEBUG implies them, but not the other way around. - Issue #350 is still open: on Windows, the code here causes it to link - with PYTHON36.DLL (for example) instead of PYTHON3.DLL. A fix was - attempted in 164e526a5515 and 14ce6985e1c3, but reverted: virtualenv - does not make PYTHON3.DLL available, and so the "correctly" compiled - version would not run inside a virtualenv. We will re-apply the fix - after virtualenv has been fixed for some time. For explanation, see - issue #355. For a workaround if you want PYTHON3.DLL and don't worry - about virtualenv, see issue #350. See also 'py_limited_api' in - setuptools_ext.py. + The implementation is messy (issue #350): on Windows, with _MSC_VER, + we have to define Py_LIMITED_API even before including pyconfig.h. + In that case, we guess what pyconfig.h will do to the macros above, + and check our guess after the #include. + + Note that on Windows, with CPython 3.x, you need virtualenv version + >= 16.0.0. Older versions don't copy PYTHON3.DLL. As a workaround + you can remove the definition of Py_LIMITED_API here. + + See also 'py_limited_api' in cffi/setuptools_ext.py. */ #if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) -# include -# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) -# define Py_LIMITED_API +# ifdef _MSC_VER +# if !defined(_DEBUG) && !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) +# define Py_LIMITED_API +# endif +# include + /* sanity-check: Py_LIMITED_API will cause crashes if any of these + are also defined. Normally, the Python file PC/pyconfig.h does not + cause any of these to be defined, with the exception that _DEBUG + causes Py_DEBUG. Double-check that. */ +# ifdef Py_LIMITED_API +# if defined(Py_DEBUG) +# error "pyconfig.h unexpectedly defines Py_DEBUG, but Py_LIMITED_API is set" +# endif +# if defined(Py_TRACE_REFS) +# error "pyconfig.h unexpectedly defines Py_TRACE_REFS, but Py_LIMITED_API is set" +# endif +# if defined(Py_REF_DEBUG) +# error "pyconfig.h unexpectedly defines Py_REF_DEBUG, but Py_LIMITED_API is set" +# endif +# endif +# else +# include +# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) +# define Py_LIMITED_API +# endif # endif #endif diff --git a/lib_pypy/cffi/backend_ctypes.py b/lib_pypy/cffi/backend_ctypes.py --- a/lib_pypy/cffi/backend_ctypes.py +++ b/lib_pypy/cffi/backend_ctypes.py @@ -636,6 +636,10 @@ if isinstance(init, bytes): init = [init[i:i+1] for i in range(len(init))] else: + if isinstance(init, CTypesGenericArray): + if (len(init) != len(blob) or + not isinstance(init, CTypesArray)): + raise TypeError("length/type mismatch: %s" % (init,)) init = tuple(init) if len(init) > len(blob): raise IndexError("too many initializers") diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -81,13 +81,8 @@ it doesn't so far, creating troubles. That's why we check for "not hasattr(sys, 'gettotalrefcount')" (the 2.7 compatible equivalent of 'd' not in sys.abiflags). (http://bugs.python.org/issue28401) - - On Windows, it's better not to use py_limited_api until issue #355 - can be resolved (by having virtualenv copy PYTHON3.DLL). See also - the start of _cffi_include.h. """ - if ('py_limited_api' not in kwds and not hasattr(sys, 'gettotalrefcount') - and sys.platform != 'win32'): + if 'py_limited_api' not in kwds and not hasattr(sys, 'gettotalrefcount'): import setuptools try: setuptools_major_version = int(setuptools.__version__.partition('.')[0]) diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -39,14 +39,10 @@ "_csv", "_cppyy", "_pypyjson", "_jitlog" ]) -from rpython.jit.backend import detect_cpu -try: - if detect_cpu.autodetect().startswith('x86'): - if not sys.platform.startswith('openbsd'): - working_modules.add('_vmprof') - working_modules.add('faulthandler') -except detect_cpu.ProcessorAutodetectError: - pass +import rpython.rlib.rvmprof.cintf +if rpython.rlib.rvmprof.cintf.IS_SUPPORTED: + working_modules.add('_vmprof') + working_modules.add('faulthandler') translation_modules = default_modules.copy() translation_modules.update([ @@ -318,3 +314,4 @@ parser = to_optparse(config) #, useoptions=["translation.*"]) option, args = parser.parse_args() print config + print working_modules diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -323,7 +323,8 @@ ------------- * Hash randomization (``-R``) `is ignored in PyPy`_. In CPython - before 3.4 it has `little point`_. + before 3.4 it has `little point`_. Both CPython >= 3.4 and PyPy3 + implement the randomized SipHash algorithm and ignore ``-R``. * You can't store non-string keys in type objects. For example:: diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -7,9 +7,13 @@ .. branch: cppyy-packaging -Upgrade to backend 1.1.0, improved handling of templated methods and +Main items: vastly better template resolution and improved performance. In +detail: upgrade to backend 1.4, improved handling of templated methods and functions (in particular automatic deduction of types), improved pythonization -interface, and a range of compatibility fixes for Python3 +interface, range of compatibility fixes for Python3, free functions now take +fast libffi path when possible, moves for strings (incl. from Python str), +easier/faster handling of std::vector by numpy, improved and faster object +identity preservation .. branch: socket_default_timeout_blockingness diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -1628,7 +1628,7 @@ else: skip_leading_underscores = False for name in all: - if skip_leading_underscores and name[0]=='_': + if skip_leading_underscores and name and name[0] == '_': continue into_locals[name] = getattr(module, name) ''', filename=__file__) diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -209,8 +209,12 @@ @specialize.arg(2) def min_max(space, args, implementation_of): - if not jit.we_are_jitted() or len(args.arguments_w) != 1 and \ - jit.loop_unrolling_heuristic(args.arguments_w, len(args.arguments_w)): + # the 'normal' version includes a JIT merge point, which will make a + # new loop (from the interpreter or from another JIT loop). If we + # give exactly two arguments to the call to max(), or a JIT virtual + # list of arguments, then we pick the 'unroll' version with no JIT + # merge point. + if jit.isvirtual(args.arguments_w) or len(args.arguments_w) == 2: return min_max_unroll(space, args, implementation_of) else: return min_max_normal(space, args, implementation_of) diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -104,7 +104,15 @@ return self.ctptr def convert_from_object(self, cdata, w_ob): - self.convert_array_from_object(cdata, w_ob) + if isinstance(w_ob, cdataobj.W_CData) and w_ob.ctype is self: + length = w_ob.get_array_length() + with w_ob as source: + source = rffi.cast(rffi.VOIDP, source) + target = rffi.cast(rffi.VOIDP, cdata) + size = rffi.cast(rffi.SIZE_T, self.ctitem.size * length) + rffi.c_memcpy(target, source, size) + else: + self.convert_array_from_object(cdata, w_ob) def convert_to_object(self, cdata): if self.length < 0: diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py --- a/pypy/module/_cffi_backend/ctypeobj.py +++ b/pypy/module/_cffi_backend/ctypeobj.py @@ -51,9 +51,12 @@ def unpack_list_of_float_items(self, ptr, length): return None - def pack_list_of_items(self, cdata, w_ob): + def pack_list_of_items(self, cdata, w_ob, expected_length): return False + def _within_bounds(self, actual_length, expected_length): + return expected_length < 0 or actual_length <= expected_length + def newp(self, w_init, allocator): space = self.space raise oefmt(space.w_TypeError, @@ -102,6 +105,11 @@ # ctype 'A' must be a pointer to same type, not cdata # 'B'", but with A=B, then give instead a different error # message to try to clear up the confusion + if self is w_got.ctype: + raise oefmt(space.w_SystemError, + "initializer for ctype '%s' is correct, but we get " + "an internal mismatch--please report a bug", + self.name) return oefmt(space.w_TypeError, "initializer for ctype '%s' appears indeed to " "be '%s', but the types are different (check " diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -287,9 +287,10 @@ return res return None - def pack_list_of_items(self, cdata, w_ob): + def pack_list_of_items(self, cdata, w_ob, expected_length): int_list = self.space.listview_int(w_ob) - if int_list is not None: + if (int_list is not None and + self._within_bounds(len(int_list), expected_length)): if self.size == rffi.sizeof(rffi.LONG): # fastest path from rpython.rlib.rrawarray import copy_list_to_raw_array cdata = rffi.cast(rffi.LONGP, cdata) @@ -300,7 +301,8 @@ if overflowed != 0: self._overflow(self.space.newint(overflowed)) return True - return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob) + return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob, + expected_length) class W_CTypePrimitiveUnsigned(W_CTypePrimitive): @@ -370,15 +372,17 @@ return res return None - def pack_list_of_items(self, cdata, w_ob): + def pack_list_of_items(self, cdata, w_ob, expected_length): int_list = self.space.listview_int(w_ob) - if int_list is not None: + if (int_list is not None and + self._within_bounds(len(int_list), expected_length)): overflowed = misc.pack_list_to_raw_array_bounds_unsigned( int_list, cdata, self.size, self.vrangemax) if overflowed != 0: self._overflow(self.space.newint(overflowed)) return True - return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob) + return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob, + expected_length) class W_CTypePrimitiveBool(W_CTypePrimitiveUnsigned): @@ -466,9 +470,10 @@ return res return None - def pack_list_of_items(self, cdata, w_ob): + def pack_list_of_items(self, cdata, w_ob, expected_length): float_list = self.space.listview_float(w_ob) - if float_list is not None: + if (float_list is not None and + self._within_bounds(len(float_list), expected_length)): if self.size == rffi.sizeof(rffi.DOUBLE): # fastest path from rpython.rlib.rrawarray import copy_list_to_raw_array cdata = rffi.cast(rffi.DOUBLEP, cdata) @@ -478,7 +483,8 @@ misc.pack_float_list_to_raw_array(float_list, cdata, rffi.FLOAT, rffi.FLOATP) return True - return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob) + return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob, + expected_length) def unpack_ptr(self, w_ctypeptr, ptr, length): result = self.unpack_list_of_float_items(ptr, length) @@ -548,13 +554,15 @@ # 'list(array-of-longdouble)' returns a list of cdata objects, # not a list of floats. - def pack_list_of_items(self, cdata, w_ob): + def pack_list_of_items(self, cdata, w_ob, expected_length): float_list = self.space.listview_float(w_ob) - if float_list is not None: + if (float_list is not None and + self._within_bounds(len(float_list), expected_length)): misc.pack_float_list_to_raw_array(float_list, cdata, rffi.LONGDOUBLE, rffi.LONGDOUBLEP) return True - return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob) + return W_CTypePrimitive.pack_list_of_items(self, cdata, w_ob, + expected_length) @jit.dont_look_inside def nonzero(self, cdata): diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -56,7 +56,7 @@ def _convert_array_from_listview(self, cdata, lst_w): space = self.space - if self.length >= 0 and len(lst_w) > self.length: + if not self._within_bounds(len(lst_w), self.length): raise oefmt(space.w_IndexError, "too many initializers for '%s' (got %d)", self.name, len(lst_w)) @@ -69,8 +69,8 @@ space = self.space if (space.isinstance_w(w_ob, space.w_list) or space.isinstance_w(w_ob, space.w_tuple)): - if self.ctitem.pack_list_of_items(cdata, w_ob): # fast path - pass + if self.ctitem.pack_list_of_items(cdata, w_ob, self.length): + pass # fast path else: self._convert_array_from_listview(cdata, space.listview(w_ob)) elif self.accept_str: diff --git a/pypy/module/_cffi_backend/errorbox.py b/pypy/module/_cffi_backend/errorbox.py --- a/pypy/module/_cffi_backend/errorbox.py +++ b/pypy/module/_cffi_backend/errorbox.py @@ -69,7 +69,10 @@ import sys class FileLike: def write(self, x): - of.write(x) + try: + of.write(x) + except: + pass self.buf += x fl = FileLike() fl.buf = '' diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -1862,7 +1862,7 @@ def test_newp_copying(): """Test that we can do newp(, ) for most - types, with the exception of arrays, like in C. + types, including same-type arrays. """ BInt = new_primitive_type("int") p = newp(new_pointer_type(BInt), cast(BInt, 42)) @@ -1891,8 +1891,9 @@ a1 = newp(BArray, [1, 2, 3, 4]) py.test.raises(TypeError, newp, BArray, a1) BArray6 = new_array_type(new_pointer_type(BInt), 6) - a1 = newp(BArray6, None) - py.test.raises(TypeError, newp, BArray6, a1) + a1 = newp(BArray6, [10, 20, 30]) + a2 = newp(BArray6, a1) + assert list(a2) == [10, 20, 30, 0, 0, 0] # s1 = newp(BStructPtr, [42]) s2 = newp(BStructPtr, s1[0]) @@ -3935,8 +3936,8 @@ def test_char_pointer_conversion(): import warnings - assert __version__.startswith(("1.8", "1.9", "1.10", "1.11", "1.12")), ( - "consider turning the warning into an error") + assert __version__.startswith("1."), ( + "the warning will be an error if we ever release cffi 2.x") BCharP = new_pointer_type(new_primitive_type("char")) BIntP = new_pointer_type(new_primitive_type("int")) BVoidP = new_pointer_type(new_void_type()) diff --git a/pypy/module/_cffi_backend/test/test_fastpath.py b/pypy/module/_cffi_backend/test/test_fastpath.py --- a/pypy/module/_cffi_backend/test/test_fastpath.py +++ b/pypy/module/_cffi_backend/test/test_fastpath.py @@ -267,3 +267,17 @@ assert lst == [1.25, -2.5, 3.75] if not self.runappdirect: assert self.get_count() == 1 + + def test_too_many_initializers(self): + import _cffi_backend + ffi = _cffi_backend.FFI() + raises(IndexError, ffi.new, "int[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "int[4]", tuple(range(999))) + raises(IndexError, ffi.new, "unsigned int[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "float[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "long double[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "char[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "wchar_t[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "_Bool[4]", [10, 20, 30, 40, 50]) + raises(IndexError, ffi.new, "int[4][4]", [[3,4,5,6]] * 5) + raises(IndexError, ffi.new, "int[4][4]", [[3,4,5,6,7]] * 4) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -563,3 +563,13 @@ assert len(z) == 2 assert ffi.cast("int *", z)[0] == 0x12345 assert list(z) == [u'\U00012345', u'\x00'] # maybe a 2-unichars str + + def test_ffi_array_as_init(self): + import _cffi_backend as _cffi1_backend + ffi = _cffi1_backend.FFI() + p = ffi.new("int[4]", [10, 20, 30, 400]) + q = ffi.new("int[4]", p) + assert list(q) == [10, 20, 30, 400] + raises(TypeError, ffi.new, "int[3]", p) + raises(TypeError, ffi.new, "int[5]", p) + raises(TypeError, ffi.new, "int16_t[4]", p) diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -1,13 +1,18 @@ import os + from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit, jit_libffi, libffi, rdynload, objectmodel from rpython.rlib.rarithmetic import r_singlefloat from rpython.tool import leakfinder -from pypy.interpreter.gateway import interp2app -from pypy.interpreter.error import oefmt +from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.argument import Arguments +from pypy.interpreter.gateway import interp2app, interpindirect2app +from pypy.interpreter.typedef import TypeDef +from pypy.objspace.std.iterobject import W_AbstractSeqIterObject +from pypy.module._rawffi.array import W_ArrayInstance from pypy.module._cffi_backend import ctypefunc, ctypeprim, cdataobj, misc from pypy.module._cffi_backend import newtype from pypy.module._cppyy import ffitypes @@ -23,10 +28,11 @@ class _Arg: # poor man's union _immutable_ = True - def __init__(self, tc, h = 0, l = -1, s = '', p = rffi.cast(rffi.VOIDP, 0)): + def __init__(self, tc, h = 0, l = -1, d = -1., s = '', p = rffi.cast(rffi.VOIDP, 0)): self.tc = tc self._handle = h self._long = l + self._double = d self._string = s self._voidp = p @@ -40,6 +46,11 @@ def __init__(self, val): _Arg.__init__(self, 'l', l = val) +class _ArgD(_Arg): + _immutable_ = True + def __init__(self, val): + _Arg.__init__(self, 'd', d = val) + class _ArgS(_Arg): _immutable_ = True def __init__(self, val): @@ -89,6 +100,9 @@ assert obj._voidp != rffi.cast(rffi.VOIDP, 0) data = rffi.cast(rffi.VOIDPP, data) data[0] = obj._voidp + elif obj.tc == 'd': + assert isinstance(argtype, ctypeprim.W_CTypePrimitiveFloat) + misc.write_raw_float_data(data, rffi.cast(rffi.DOUBLE, obj._double), argtype.size) else: # only other use is string assert obj.tc == 's' n = len(obj._string) @@ -182,6 +196,7 @@ 'call_f' : ([c_method, c_object, c_int, c_voidp], c_float), 'call_d' : ([c_method, c_object, c_int, c_voidp], c_double), 'call_ld' : ([c_method, c_object, c_int, c_voidp], c_ldouble), + 'call_nld' : ([c_method, c_object, c_int, c_voidp], c_double), 'call_r' : ([c_method, c_object, c_int, c_voidp], c_voidp), # call_s actually takes an size_t* as last parameter, but this will do @@ -236,6 +251,8 @@ 'method_prototype' : ([c_scope, c_method, c_int], c_ccharp), 'is_const_method' : ([c_method], c_int), + 'get_num_templated_methods': ([c_scope], c_int), + 'get_templated_method_name': ([c_scope, c_index], c_ccharp), 'exists_method_template' : ([c_scope, c_ccharp], c_int), 'method_is_template' : ([c_scope, c_index], c_int), 'get_method_template' : ([c_scope, c_ccharp, c_ccharp], c_method), @@ -272,9 +289,11 @@ 'stdstring2charp' : ([c_object, c_voidp], c_ccharp), 'stdstring2stdstring' : ([c_object], c_object), - 'stdvector_valuetype' : ([c_ccharp], c_ccharp), - 'stdvector_valuesize' : ([c_ccharp], c_size_t), + 'longdouble2double' : ([c_voidp], c_double), + 'double2longdouble' : ([c_double, c_voidp], c_void), + 'vectorbool_getitem' : ([c_object, c_int], c_int), + 'vectorbool_setitem' : ([c_object, c_int, c_int], c_void), } # size/offset are backend-specific but fixed after load @@ -401,7 +420,9 @@ return rffi.cast(rffi.DOUBLE, space.float_w(call_capi(space, 'call_d', args))) def c_call_ld(space, cppmethod, cppobject, nargs, cargs): args = [_ArgH(cppmethod), _ArgH(cppobject), _ArgL(nargs), _ArgP(cargs)] - return rffi.cast(rffi.LONGDOUBLE, space.float_w(call_capi(space, 'call_ld', args))) + #return rffi.cast(rffi.LONGDOUBLE, space.float_w(call_capi(space, 'call_ld', args))) + # call_nld narrows long double to double + return rffi.cast(rffi.DOUBLE, space.float_w(call_capi(space, 'call_nld', args))) def c_call_r(space, cppmethod, cppobject, nargs, cargs): args = [_ArgH(cppmethod), _ArgH(cppobject), _ArgL(nargs), _ArgP(cargs)] @@ -561,16 +582,21 @@ def c_is_const_method(space, cppmeth): return space.bool_w(call_capi(space, 'is_const_method', [_ArgH(cppmeth)])) +def c_get_num_templated_methods(space, cppscope): + return space.int_w(call_capi(space, 'method_is_template', [_ArgH(cppscope.handle)])) +def c_get_templated_method_name(space, cppscope, index): + args = [_ArgH(cppscope.handle), _ArgL(index)] + return charp2str_free(space, call_capi(space, 'method_is_template', args)) def c_exists_method_template(space, cppscope, name): args = [_ArgH(cppscope.handle), _ArgS(name)] return space.bool_w(call_capi(space, 'exists_method_template', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] return space.bool_w(call_capi(space, 'method_is_template', args)) - def c_get_method_template(space, cppscope, name, proto): args = [_ArgH(cppscope.handle), _ArgS(name), _ArgS(proto)] return rffi.cast(C_METHOD, space.uint_w(call_capi(space, 'get_method_template', args))) + def c_get_global_operator(space, nss, lc, rc, op): if nss is not None: args = [_ArgH(nss.handle), _ArgH(lc.handle), _ArgH(rc.handle), _ArgS(op)] @@ -619,7 +645,7 @@ return space.bool_w(call_capi(space, 'is_enum_data', args)) def c_get_dimension_size(space, cppscope, datamember_index, dim_idx): args = [_ArgH(cppscope.handle), _ArgL(datamember_index), _ArgL(dim_idx)] - return space.bool_w(call_capi(space, 'get_dimension_size', args)) + return space.int_w(call_capi(space, 'get_dimension_size', args)) # misc helpers --------------------------------------------------------------- def c_strtoll(space, svalue): @@ -650,24 +676,94 @@ def c_stdstring2stdstring(space, cppobject): return _cdata_to_cobject(space, call_capi(space, 'stdstring2stdstring', [_ArgH(cppobject)])) -def c_stdvector_valuetype(space, pystr): - return charp2str_free(space, call_capi(space, 'stdvector_valuetype', [_ArgS(pystr)])) +def c_longdouble2double(space, addr): + return space.float_w(call_capi(space, 'longdouble2double', [_ArgP(addr)])) +def c_double2longdouble(space, dval, addr): + call_capi(space, 'double2longdouble', [_ArgD(dval), _ArgP(addr)]) -def c_stdvector_valuetype(space, pystr): - return charp2str_free(space, call_capi(space, 'stdvector_valuetype', [_ArgS(pystr)])) -def c_stdvector_valuesize(space, pystr): - return _cdata_to_size_t(space, call_capi(space, 'stdvector_valuesize', [_ArgS(pystr)])) +def c_vectorbool_getitem(space, vbool, idx): + return call_capi(space, 'vectorbool_getitem', [_ArgH(vbool), _ArgL(idx)]) +def c_vectorbool_setitem(space, vbool, idx, value): + call_capi(space, 'vectorbool_setitem', [_ArgH(vbool), _ArgL(idx), _ArgL(value)]) # TODO: factor these out ... # pythonizations def stdstring_c_str(space, w_self): """Return a python string taking into account \0""" - from pypy.module._cppyy import interp_cppyy cppstr = space.interp_w(interp_cppyy.W_CPPInstance, w_self, can_be_None=False) return space.newtext(c_stdstring2charp(space, cppstr._rawobject)) +def vbool_getindex(space, w_vbool, w_idx): + idx = space.getindex_w(w_idx, space.w_IndexError, "std::vector index") + sz = space.len_w(w_vbool) + if idx < 0: idx += sz + if idx < 0 or idx >= sz: + raise IndexError + return idx + +def vectorbool_getitem(space, w_self, w_idx): + """Index a std::vector, return the value""" + from pypy.module._cppyy import interp_cppyy + vbool = space.interp_w(interp_cppyy.W_CPPInstance, w_self, can_be_None=False) + idx = vbool_getindex(space, w_self, w_idx) + item = c_vectorbool_getitem(space, vbool._rawobject, idx) + return space.newbool(space.is_true(item)) + +def vectorbool_setitem(space, w_self, w_idx, w_value): + """Index a std::vector, set the value""" + from pypy.module._cppyy import interp_cppyy + vbool = space.interp_w(interp_cppyy.W_CPPInstance, w_self, can_be_None=False) + idx = vbool_getindex(space, w_self, w_idx) + c_vectorbool_setitem(space, vbool._rawobject, idx, int(space.is_true(w_value))) + +class W_STLVectorIter(W_AbstractSeqIterObject): + # w_seq and index are in base class + _immutable_fields_ = ['converter', 'data', 'len', 'stride'] + + def __init__(self, space, w_vector): + W_AbstractSeqIterObject.__init__(self, w_vector) + # TODO: this should live in rpythonize.py or something so that the + # imports can move to the top w/o getting circles + from pypy.module._cppyy import interp_cppyy + assert isinstance(w_vector, interp_cppyy.W_CPPInstance) + vector = space.interp_w(interp_cppyy.W_CPPInstance, w_vector) + + v_type = c_resolve_name(space, vector.clsdecl.name+'::value_type') + v_size = c_size_of_type(space, v_type) + + if not v_type or not v_size: + raise NotImplementedError # fallback on getitem + + from pypy.module._cppyy import converter + self.converter = converter.get_converter(space, v_type, '') + + # this 'data' is from the decl, so not the pythonized data from pythonify.py + w_arr = space.call_obj_args(vector.clsdecl.get_overload('data'), w_vector, Arguments(space, [])) + arr = space.interp_w(W_ArrayInstance, w_arr, can_be_None=True) + if not arr: + raise OperationError(space.w_StopIteration, space.w_None) + + self.data = rffi.cast(rffi.CCHARP, space.uint_w(arr.getbuffer(space))) + self.len = space.uint_w(space.call_obj_args(vector.clsdecl.get_overload('size'), w_vector, Arguments(space, []))) + self.stride = v_size + + def descr_next(self, space): + if self.w_seq is None: + raise OperationError(space.w_StopIteration, space.w_None) + if self.len <= self.index: + self.w_seq = None + raise OperationError(space.w_StopIteration, space.w_None) + offset = lltype.direct_ptradd(self.data, rffi.cast(rffi.SIZE_T, self.index*self.stride)) + w_item = self.converter.from_memory(space, space.w_None, rffi.cast(rffi.LONG, offset)) + self.index += 1 + return w_item + +def stdvector_iter(space, w_self): + return W_STLVectorIter(space, w_self) + + # setup pythonizations for later use at run-time _pythonizations = {} def register_pythonizations(space): @@ -678,6 +774,12 @@ ### std::string stdstring_c_str, + ### std::vector + stdvector_iter, + + ### std::vector + vectorbool_getitem, + vectorbool_setitem, ] for f in allfuncs: @@ -692,3 +794,10 @@ space.setattr(w_pycppclass, space.newtext("c_str"), _pythonizations["stdstring_c_str"]) _method_alias(space, w_pycppclass, "_cppyy_as_builtin", "c_str") _method_alias(space, w_pycppclass, "__str__", "c_str") + + if name.find("std::vector 0: + # default encodes the dimensions + dims = default.split(':') + return InstanceArrayConverter(space, clsdecl, array_size, dims) + elif cpd == "": return InstanceConverter(space, clsdecl) elif "(anonymous)" in name: # special case: enum w/o a type name @@ -859,7 +970,7 @@ return FunctionPointerConverter(space, name[pos+2:]) # void* or void converter (which fails on use) - if 0 <= compound.find('*'): + if 0 <= cpd.find('*'): return VoidPtrConverter(space, default) # "user knows best" # return a void converter here, so that the class can be build even @@ -874,8 +985,8 @@ _converters["const float&"] = ConstFloatRefConverter _converters["double"] = DoubleConverter _converters["const double&"] = ConstDoubleRefConverter -#_converters["long double"] = LongDoubleConverter -#_converters["const long double&"] = ConstLongDoubleRefConverter +_converters["long double"] = LongDoubleConverter +_converters["const long double&"] = ConstLongDoubleRefConverter _converters["const char*"] = CStringConverter _converters["void*"] = VoidPtrConverter _converters["void**"] = VoidPtrPtrConverter @@ -885,6 +996,7 @@ _converters["std::basic_string"] = StdStringConverter _converters["const std::basic_string&"] = StdStringConverter # TODO: shouldn't copy _converters["std::basic_string&"] = StdStringRefConverter +_converters["std::basic_string&&"] = StdStringMoveConverter _converters["PyObject*"] = PyObjectConverter @@ -908,7 +1020,12 @@ _immutable_ = True typecode = c_tc def __init__(self, space, default): - self.default = rffi.cast(self.c_type, capi.c_strtoll(space, default)) + self.valid_default = False + try: + self.default = rffi.cast(self.c_type, capi.c_strtoll(space, default)) + self.valid_default = True + except Exception: + self.default = rffi.cast(self.c_type, 0) class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter): _immutable_ = True for name in names: @@ -925,7 +1042,12 @@ _immutable_ = True typecode = c_tc def __init__(self, space, default): - self.default = rffi.cast(self.c_type, capi.c_strtoll(space, default)) + self.valid_default = False + try: + self.default = rffi.cast(self.c_type, capi.c_strtoll(space, default)) + self.valid_default = True + except Exception: + self.default = rffi.cast(self.c_type, 0) class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter): _immutable_ = True for name in names: @@ -945,7 +1067,12 @@ _immutable_ = True typecode = c_tc def __init__(self, space, default): - self.default = rffi.cast(self.c_type, capi.c_strtoull(space, default)) + self.valid_default = False + try: + self.default = rffi.cast(self.c_type, capi.c_strtoull(space, default)) + self.valid_default = True + except Exception: + self.default = rffi.cast(self.c_type, 0) class ConstRefConverter(ConstRefNumericTypeConverterMixin, BasicConverter): _immutable_ = True for name in names: @@ -1002,6 +1129,7 @@ ("std::basic_string", "string"), ("const std::basic_string&", "const string&"), ("std::basic_string&", "string&"), + ("std::basic_string&&", "string&&"), ("PyObject*", "_object*"), ) diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -1,14 +1,10 @@ import sys from pypy.interpreter.error import oefmt - from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib import jit_libffi - from pypy.module._rawffi.interp_rawffi import letter2tp -from pypy.module._rawffi.array import W_Array, W_ArrayInstance - -from pypy.module._cppyy import helper, capi, ffitypes +from pypy.module._cppyy import helper, capi, ffitypes, lowlevelviews # Executor objects are used to dispatch C++ methods. They are defined by their # return type only: arguments are converted by Converter objects, and Executors @@ -60,7 +56,7 @@ from pypy.module._cppyy import interp_cppyy return interp_cppyy.get_nullptr(space) shape = letter2tp(space, self.typecode) - return W_ArrayInstance(space, shape, sys.maxint/shape.size, ptrval) + return lowlevelviews.W_LowLevelView(space, shape, sys.maxint/shape.size, ptrval) class VoidExecutor(Executor): @@ -80,9 +76,6 @@ class NumericExecutorMixin(object): _mixin_ = True - #def _wrap_object(self, space, obj): - # return getattr(space, self.wrapper)(obj) - def execute(self, space, cppmethod, cppthis, num_args, args): result = self.c_stubcall(space, cppmethod, cppthis, num_args, args) return self._wrap_object(space, rffi.cast(self.c_type, result)) @@ -98,19 +91,16 @@ def __init__(self, space, extra): Executor.__init__(self, space, extra) self.do_assign = False - self.item = rffi.cast(self.c_type, 0) + self.w_item = space.w_None def set_item(self, space, w_item): - self.item = self._unwrap_object(space, w_item) + self.w_item = w_item self.do_assign = True - #def _wrap_object(self, space, obj): - # return getattr(space, self.wrapper)(rffi.cast(self.c_type, obj)) - def _wrap_reference(self, space, rffiptr): if self.do_assign: - rffiptr[0] = self.item - self.do_assign = False + rffiptr[0] = rffi.cast(self.c_type, self._unwrap_object(space, self.w_item)) + self.do_assign = False return self._wrap_object(space, rffiptr[0]) # all paths, for rtyper def execute(self, space, cppmethod, cppthis, num_args, args): @@ -123,6 +113,48 @@ return self._wrap_reference(space, rffi.cast(self.c_ptrtype, rffi.cast(rffi.VOIDPP, result)[0])) +class LongDoubleExecutorMixin(object): + # Note: not really supported, but returns normal double + _mixin_ = True + + def execute(self, space, cppmethod, cppthis, num_args, args): + result = self.c_stubcall(space, cppmethod, cppthis, num_args, args) + return space.newfloat(result) + + def execute_libffi(self, space, cif_descr, funcaddr, buffer): + from pypy.module._cppyy.interp_cppyy import FastCallNotPossible + raise FastCallNotPossible + +class LongDoubleExecutor(ffitypes.typeid(rffi.LONGDOUBLE), LongDoubleExecutorMixin, Executor): + _immutable_ = True + c_stubcall = staticmethod(capi.c_call_ld) + +class LongDoubleRefExecutorMixin(NumericRefExecutorMixin): + # Note: not really supported, but returns normal double + _mixin_ = True + + def _wrap_reference(self, space, rffiptr): + if self.do_assign: + capi.c_double2longdouble(space, space.float_w(self.w_item), rffiptr) + self.do_assign = False + return self.w_item + return space.newfloat(capi.c_longdouble2double(space, rffiptr)) + + def execute(self, space, cppmethod, cppthis, num_args, args): + result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) + return self._wrap_reference(space, rffi.cast(self.c_ptrtype, result)) + + def execute_libffi(self, space, cif_descr, funcaddr, buffer): + jit_libffi.jit_ffi_call(cif_descr, funcaddr, buffer) + result = rffi.ptradd(buffer, cif_descr.exchange_result) + return self._wrap_reference(space, + rffi.cast(self.c_ptrtype, rffi.cast(rffi.VOIDPP, result)[0])) + +class LongDoubleRefExecutor(ffitypes.typeid(rffi.LONGDOUBLE), LongDoubleRefExecutorMixin, Executor): + def cffi_type(self, space): + state = space.fromcache(ffitypes.State) + return state.c_voidp + class CStringExecutor(Executor): def execute(self, space, cppmethod, cppthis, num_args, args): @@ -345,6 +377,10 @@ _executors["void*"] = PtrTypeExecutor _executors["const char*"] = CStringExecutor +# long double not really supported: narrows to double +_executors["long double"] = LongDoubleExecutor +_executors["long double&"] = LongDoubleRefExecutor + # special cases (note: 'string' aliases added below) _executors["constructor"] = ConstructorExecutor diff --git a/pypy/module/_cppyy/ffitypes.py b/pypy/module/_cppyy/ffitypes.py --- a/pypy/module/_cppyy/ffitypes.py +++ b/pypy/module/_cppyy/ffitypes.py @@ -296,7 +296,8 @@ _immutable_fields_ = ['c_type', 'c_ptrtype', 'typecode'] c_type = rffi.LONGDOUBLE - c_ptrtype = rffi.LONGDOUBLEP + # c_ptrtype = rffi.LONGDOUBLEP # useless type at this point + c_ptrtype = rffi.VOIDP typecode = 'g' # long double is not really supported ... @@ -304,7 +305,7 @@ return r_longfloat(space.float_w(w_obj)) def _wrap_object(self, space, obj): - return space.wrap(obj) + return space.newfloat(obj) def cffi_type(self, space): state = space.fromcache(State) diff --git a/pypy/module/_cppyy/helper.py b/pypy/module/_cppyy/helper.py --- a/pypy/module/_cppyy/helper.py +++ b/pypy/module/_cppyy/helper.py @@ -117,16 +117,10 @@ # TODO: perhaps absorb or "pythonify" these operators? return cppname -if sys.hexversion < 0x3000000: - CPPYY__div__ = "__div__" - CPPYY__idiv__ = "__idiv__" - CPPYY__long__ = "__long__" - CPPYY__bool__ = "__nonzero__" -else: - CPPYY__div__ = "__truediv__" - CPPYY__idiv__ = "__itruediv__" - CPPYY__long__ = "__int__" - CPPYY__bool__ = "__bool__" +CPPYY__div__ = "__div__" +CPPYY__idiv__ = "__idiv__" +CPPYY__long__ = "__long__" +CPPYY__bool__ = "__nonzero__" # _operator_mappings["[]"] = "__setitem__" # depends on return type # _operator_mappings["+"] = "__add__" # depends on # of args (see __pos__) diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -63,6 +63,8 @@ double cppyy_call_d(cppyy_method_t method, cppyy_object_t self, int nargs, void* args); RPY_EXTERN long double cppyy_call_ld(cppyy_method_t method, cppyy_object_t self, int nargs, void* args); + RPY_EXTERN + double cppyy_call_nld(cppyy_method_t method, cppyy_object_t self, int nargs, void* args); RPY_EXTERN void* cppyy_call_r(cppyy_method_t method, cppyy_object_t self, int nargs, void* args); @@ -151,11 +153,15 @@ RPY_EXTERN char* cppyy_method_signature(cppyy_method_t, int show_formalargs); RPY_EXTERN - char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_method_t idx, int show_formalargs); + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_method_t, int show_formalargs); RPY_EXTERN int cppyy_is_const_method(cppyy_method_t); RPY_EXTERN + int get_num_templated_methods(cppyy_scope_t scope); + RPY_EXTERN + char* get_templated_method_name(cppyy_scope_t scope, cppyy_index_t imeth); + RPY_EXTERN int cppyy_exists_method_template(cppyy_scope_t scope, const char* name); RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); @@ -216,9 +222,14 @@ cppyy_object_t cppyy_stdstring2stdstring(cppyy_object_t ptr); RPY_EXTERN - const char* cppyy_stdvector_valuetype(const char* clname); + double cppyy_longdouble2double(void*); RPY_EXTERN - size_t cppyy_stdvector_valuesize(const char* clname); + void cppyy_double2longdouble(double, void*); + + RPY_EXTERN + int cppyy_vectorbool_getitem(cppyy_object_t ptr, int idx); + RPY_EXTERN + void cppyy_vectorbool_setitem(cppyy_object_t ptr, int idx, int value); #ifdef __cplusplus } diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -24,6 +24,7 @@ INSTANCE_FLAGS_IS_RVALUE = 0x0004 OVERLOAD_FLAGS_USE_FFI = 0x0001 +OVERLOAD_FLAGS_CREATES = 0x0002 FUNCTION_IS_GLOBAL = 0x0001 FUNCTION_IS_STATIC = 0x0001 @@ -41,6 +42,7 @@ 'void**' : 100, 'float' : 30, 'double' : 10, + 'bool' : 1, 'const string&' : 1, } # solves a specific string ctor overload from rpython.rlib.listsort import make_timsort_class @@ -167,6 +169,7 @@ # # W_CPPOverload: instance methods (base class) # W_CPPConstructorOverload: constructors +# W_CPPAbstractCtorOverload: to provent instantiation of abstract classes # W_CPPStaticOverload: free and static functions # W_CPPTemplateOverload: templated methods # W_CPPTemplateStaticOverload: templated free and static functions @@ -227,8 +230,10 @@ if self.converters is None: try: self._setup(cppthis) - except Exception: - pass + except Exception as e: + if self.converters is None: + raise oefmt(self.space.w_SystemError, + "unable to initialize converters (%s)", str(e)) # attempt to call directly through ffi chain if useffi and self._funcaddr: @@ -258,7 +263,8 @@ jit.promote(self) cif_descr = self.cif_descr # add extra space for const-ref support (see converter.py) - buffer = lltype.malloc(rffi.CCHARP.TO, cif_descr.exchange_size+len(self.arg_defs)*rffi.sizeof(rffi.DOUBLE), flavor='raw') + buffer = lltype.malloc(rffi.CCHARP.TO, + cif_descr.exchange_size+len(self.arg_defs)*rffi.sizeof(rffi.DOUBLE), flavor='raw') thisoff = 0 try: if cppthis: @@ -412,8 +418,10 @@ def priority(self): total_arg_priority = 0 - for p in [priority.get(arg_type, 0) for arg_type, arg_dflt in self.arg_defs]: - total_arg_priority += p + for arg_type, arg_dflt in self.arg_defs: + total_arg_priority += priority.get(arg_type, 0) + if '&&' in arg_type: + total_arg_priority += 100 return total_arg_priority @rgc.must_be_light_finalizer @@ -433,7 +441,7 @@ class CPPSetItem(CPPMethod): """Method dispatcher specific to Python's __setitem__ mapped onto C++'s - operator[](int). The former function takes an extra argument to assign to + operator[](T). The former function takes an extra argument to assign to the return type of the latter.""" _attrs_ = [] @@ -453,6 +461,36 @@ # need forwarding, which the normal instancemethod does not provide, hence this # derived class. class MethodWithProps(Method): + # set life management of result from the call + def fget_creates(self, space): + f = space.interp_w(W_CPPOverload, self.w_function) + return f.fget_creates(space) + + @unwrap_spec(value=bool) + def fset_creates(self, space, value): + f = space.interp_w(W_CPPOverload, self.w_function) + f.fset_creates(space, value) + + # set ownership policy of arguments (not yet implemented) + def fget_mempolicy(self, space): + f = space.interp_w(W_CPPOverload, self.w_function) + return f.fget_mempolicy(space) + + @unwrap_spec(value=int) + def fset_mempolicy(self, space, value): + f = space.interp_w(W_CPPOverload, self.w_function) + f.fset_mempolicy(space, value) + + # set to release the gil during call (not yet implemented) + def fget_release_gil(self, space): + f = space.interp_w(W_CPPOverload, self.w_function) + return f.fget_release_gil(space) + + @unwrap_spec(value=bool) + def fset_release_gil(self, space, value): + f = space.interp_w(W_CPPOverload, self.w_function) + f.fset_release_gil(space, value) + # allow user to determine ffi use rules per overload def fget_useffi(self, space): f = space.interp_w(W_CPPOverload, self.w_function) @@ -468,22 +506,25 @@ __doc__ = """cpp_instancemethod(function, instance, class) Create an instance method object.""", - __new__ = interp2app(MethodWithProps.descr_method__new__.im_func), - __call__ = interp2app(MethodWithProps.descr_method_call), - __get__ = interp2app(MethodWithProps.descr_method_get), - im_func = interp_attrproperty_w('w_function', cls=MethodWithProps), - __func__ = interp_attrproperty_w('w_function', cls=MethodWithProps), - im_self = interp_attrproperty_w('w_instance', cls=MethodWithProps), - __self__ = interp_attrproperty_w('w_instance', cls=MethodWithProps), - im_class = interp_attrproperty_w('w_class', cls=MethodWithProps), + __new__ = interp2app(MethodWithProps.descr_method__new__.im_func), + __call__ = interp2app(MethodWithProps.descr_method_call), + __get__ = interp2app(MethodWithProps.descr_method_get), + im_func = interp_attrproperty_w('w_function', cls=MethodWithProps), + __func__ = interp_attrproperty_w('w_function', cls=MethodWithProps), + im_self = interp_attrproperty_w('w_instance', cls=MethodWithProps), + __self__ = interp_attrproperty_w('w_instance', cls=MethodWithProps), + im_class = interp_attrproperty_w('w_class', cls=MethodWithProps), __getattribute__ = interp2app(MethodWithProps.descr_method_getattribute), - __eq__ = interp2app(MethodWithProps.descr_method_eq), - __ne__ = descr_generic_ne, - __hash__ = interp2app(MethodWithProps.descr_method_hash), - __repr__ = interp2app(MethodWithProps.descr_method_repr), - __reduce__ = interp2app(MethodWithProps.descr_method__reduce__), - __weakref__ = make_weakref_descr(MethodWithProps), - __useffi__ = GetSetProperty(MethodWithProps.fget_useffi, MethodWithProps.fset_useffi), + __eq__ = interp2app(MethodWithProps.descr_method_eq), + __ne__ = descr_generic_ne, + __hash__ = interp2app(MethodWithProps.descr_method_hash), + __repr__ = interp2app(MethodWithProps.descr_method_repr), + __reduce__ = interp2app(MethodWithProps.descr_method__reduce__), + __weakref__ = make_weakref_descr(MethodWithProps), + __creates__ = GetSetProperty(MethodWithProps.fget_creates, MethodWithProps.fset_creates), + __mempolicy__ = GetSetProperty(MethodWithProps.fget_mempolicy, MethodWithProps.fset_mempolicy), + __release_gil__ = GetSetProperty(MethodWithProps.fget_release_gil, MethodWithProps.fset_release_gil), + __useffi__ = GetSetProperty(MethodWithProps.fget_useffi, MethodWithProps.fset_useffi), ) MethodWithProps.typedef.acceptable_as_base_class = False @@ -505,6 +546,9 @@ def descr_get(self, w_obj, w_cls=None): """functionobject.__get__(obj[, type]) -> method""" # TODO: check validity of w_cls if given + # TODO: this does not work for Python 3, which does not have + # unbound methods (probably no common code possible, see also + # pypy/interpreter/function.py) space = self.space asking_for_bound = (space.is_none(w_cls) or not space.is_w(w_obj, space.w_None) or @@ -512,7 +556,7 @@ if asking_for_bound: return MethodWithProps(space, self, w_obj, w_cls) else: - return self # unbound methods don't exist in Python 3, return self + return MethodWithProps(space, self, None, w_cls) @unwrap_spec(args_w='args_w') def call_args(self, args_w): @@ -543,7 +587,12 @@ for i in range(len(self.functions)): cppyyfunc = self.functions[i] try: - return cppyyfunc.call(cppthis, args_w, self.flags & OVERLOAD_FLAGS_USE_FFI) + w_result = cppyyfunc.call(cppthis, args_w, self.flags & OVERLOAD_FLAGS_USE_FFI) + if self.flags & OVERLOAD_FLAGS_CREATES: + if isinstance(w_result, W_CPPInstance): + cppinstance = self.space.interp_w(W_CPPInstance, w_result) + cppinstance.fset_python_owns(self.space, self.space.w_True) + return w_result except Exception: pass @@ -556,6 +605,7 @@ for i in range(len(self.functions)): cppyyfunc = self.functions[i] try: + # no need to set ownership on the return value, as none of the methods execute return cppyyfunc.call(cppthis, args_w, self.flags & OVERLOAD_FLAGS_USE_FFI) except OperationError as e: # special case if there's just one function, to prevent clogging the error message @@ -584,6 +634,43 @@ sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) + @unwrap_spec(signature='text') + def mp_overload(self, signature): + sig = '(%s)' % signature + for f in self.functions: + if f.signature(False) == sig: + if isinstance(self, W_CPPStaticOverload): + return W_CPPStaticOverload(self.space, self.scope, [f]) + return W_CPPOverload(self.space, self.scope, [f]) + raise oefmt(self.space.w_LookupError, "signature '%s' not found", signature) + + # set life management of result from the call + def fget_creates(self, space): + return space.newbool(bool(self.flags & OVERLOAD_FLAGS_CREATES)) + + @unwrap_spec(value=bool) + def fset_creates(self, space, value): + if space.is_true(value): + self.flags |= OVERLOAD_FLAGS_CREATES + else: + self.flags &= ~OVERLOAD_FLAGS_CREATES + + # set ownership policy of arguments (not yet implemented) + def fget_mempolicy(self, space): + return space.newint(0) + + @unwrap_spec(value=int) + def fset_mempolicy(self, space, value): + pass + + # set to release the gil during call (not yet implemented) + def fget_release_gil(self, space): + return space.newbool(True) + + @unwrap_spec(value=bool) + def fset_release_gil(self, space, value): + pass + # allow user to determine ffi use rules per overload def fget_useffi(self, space): return space.newbool(bool(self.flags & OVERLOAD_FLAGS_USE_FFI)) @@ -607,10 +694,14 @@ W_CPPOverload.typedef = TypeDef( 'CPPOverload', - __get__ = interp2app(W_CPPOverload.descr_get), - __call__ = interp2app(W_CPPOverload.call_args), - __useffi__ = GetSetProperty(W_CPPOverload.fget_useffi, W_CPPOverload.fset_useffi), - __doc__ = GetSetProperty(W_CPPOverload.fget_doc) + __get__ = interp2app(W_CPPOverload.descr_get), + __call__ = interp2app(W_CPPOverload.call_args), + __creates__ = GetSetProperty(W_CPPOverload.fget_creates, W_CPPOverload.fset_creates), + __mempolicy__ = GetSetProperty(W_CPPOverload.fget_mempolicy, W_CPPOverload.fset_mempolicy), + __release_gil__ = GetSetProperty(W_CPPOverload.fget_release_gil, W_CPPOverload.fset_release_gil), + __useffi__ = GetSetProperty(W_CPPOverload.fget_useffi, W_CPPOverload.fset_useffi), + __overload__ = interp2app(W_CPPOverload.mp_overload), + __doc__ = GetSetProperty(W_CPPOverload.fget_doc) ) @@ -632,21 +723,21 @@ @unwrap_spec(args_w='args_w') def call_args(self, args_w): jit.promote(self) - #if isinstance(args_w[0], W_CPPInstance): - # free function used as bound method, leave in place return self.call_impl(capi.C_NULL_OBJECT, args_w) - # free functions are implemented as methods of 'namespace' classes, remove 'instance' - #return self.call_impl(capi.C_NULL_OBJECT, args_w[1:]) def __repr__(self): return "W_CPPStaticOverload(%s)" % [f.prototype() for f in self.functions] W_CPPStaticOverload.typedef = TypeDef( 'CPPStaticOverload', - __get__ = interp2app(W_CPPStaticOverload.descr_get), - __call__ = interp2app(W_CPPStaticOverload.call_args), - __useffi__ = GetSetProperty(W_CPPStaticOverload.fget_useffi, W_CPPStaticOverload.fset_useffi), - __doc__ = GetSetProperty(W_CPPStaticOverload.fget_doc) + __get__ = interp2app(W_CPPStaticOverload.descr_get), + __call__ = interp2app(W_CPPStaticOverload.call_args), + __creates__ = GetSetProperty(W_CPPStaticOverload.fget_creates, W_CPPStaticOverload.fset_creates), + __mempolicy__ = GetSetProperty(W_CPPStaticOverload.fget_mempolicy, W_CPPStaticOverload.fset_mempolicy), + __release_gil__ = GetSetProperty(W_CPPStaticOverload.fget_release_gil, W_CPPStaticOverload.fset_release_gil), + __useffi__ = GetSetProperty(W_CPPStaticOverload.fget_useffi, W_CPPStaticOverload.fset_useffi), + __overload__ = interp2app(W_CPPStaticOverload.mp_overload), + __doc__ = GetSetProperty(W_CPPStaticOverload.fget_doc) ) @@ -660,11 +751,6 @@ @unwrap_spec(args_w='args_w') def call_args(self, args_w): jit.promote(self) - # TODO: factor out the following: - if capi.c_is_abstract(self.space, self.scope.handle): - raise oefmt(self.space.w_TypeError, - "cannot instantiate abstract class '%s'", - self.scope.name) cppinstance = self.space.interp_w(W_CPPInstance, args_w[0]) w_result = self.call_impl(rffi.cast(capi.C_OBJECT, self.scope.handle), args_w[1:]) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) @@ -677,15 +763,34 @@ W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', - __get__ = interp2app(W_CPPConstructorOverload.descr_get), - __call__ = interp2app(W_CPPConstructorOverload.call_args), - __doc__ = GetSetProperty(W_CPPConstructorOverload.fget_doc) + __get__ = interp2app(W_CPPConstructorOverload.descr_get), + __call__ = interp2app(W_CPPConstructorOverload.call_args), + __overload__ = interp2app(W_CPPConstructorOverload.mp_overload), + __doc__ = GetSetProperty(W_CPPConstructorOverload.fget_doc) From pypy.commits at gmail.com Tue Sep 11 16:33:06 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:33:06 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fixes for failing tests Message-ID: <5b982682.1c69fb81.fba3e.2752@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95096:7e4c70d0d9eb Date: 2018-09-11 21:40 +0300 http://bitbucket.org/pypy/pypy/changeset/7e4c70d0d9eb/ Log: fixes for failing tests diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -136,14 +136,14 @@ @staticmethod def convert_arg_to_w_unicode(space, w_other, strict=None): - if isinstance(w_other, W_UnicodeObject): + if space.is_w(space.type(w_other), space.w_unicode): return w_other if space.isinstance_w(w_other, space.w_bytes): return unicode_from_string(space, w_other) if strict: raise oefmt(space.w_TypeError, "%s arg must be None, unicode or str", strict) - return unicode_from_encoded_object(space, w_other, None, "strict") + return unicode_from_encoded_object(space, w_other, 'utf8', "strict") def convert_to_w_unicode(self, space): return self @@ -226,7 +226,7 @@ return space.newtext(_repr_function(self._utf8)) def descr_str(self, space): - return encode_object(space, self, None, None) + return encode_object(space, self, 'utf8', 'strict') def descr_hash(self, space): x = compute_hash(self._utf8) From pypy.commits at gmail.com Tue Sep 11 16:33:09 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:33:09 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge unicode-utf8 into branch Message-ID: <5b982685.1c69fb81.399cd.3063@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95097:d9069a2d3f11 Date: 2018-09-11 22:38 +0300 http://bitbucket.org/pypy/pypy/changeset/d9069a2d3f11/ Log: merge unicode-utf8 into branch diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -330,7 +330,8 @@ ------------- * Hash randomization (``-R``) `is ignored in PyPy`_. In CPython - before 3.4 it has `little point`_. + before 3.4 it has `little point`_. Both CPython >= 3.4 and PyPy3 + implement the randomized SipHash algorithm and ignore ``-R``. * You can't store non-string keys in type objects. For example:: diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -32,11 +32,14 @@ The reverse-debugger branch has been merged. For more information, see https://bitbucket.org/pypy/revdb +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. + .. branch: unicode-utf8-re .. branch: utf8-io Utf8 handling for unicode .. branch: pyparser-improvements-3 - -Small refactorings in the Python parser. +Small refactorings in the Python parser. \ No newline at end of file diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -1250,7 +1250,6 @@ def test_revdb_metavar(self): from pypy.interpreter.reverse_debugging import dbstate, setup_revdb - self.space.config.translation.reverse_debugger = True self.space.reverse_debugging = True try: setup_revdb(self.space) diff --git a/pypy/module/cpyext/test0/test_abstract.py b/pypy/module/cpyext/test0/test_abstract.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test0/test_abstract.py @@ -0,0 +1,130 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +import pytest + +class AppTestBufferProtocol(AppTestCpythonExtensionBase): + """Tests for the old buffer protocol.""" + + def w_get_buffer_support(self): + return self.import_extension('buffer_support', [ + ("charbuffer_as_string", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """), + ("check_readbuffer", "METH_O", + """ + return PyBool_FromLong(PyObject_CheckReadBuffer(args)); + """), + ("readbuffer_as_string", "METH_O", + """ + const void *ptr; + Py_ssize_t size; + if (PyObject_AsReadBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("writebuffer_as_string", "METH_O", + """ + void *ptr; + Py_ssize_t size; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("zero_out_writebuffer", "METH_O", + """ + void *ptr; + Py_ssize_t size; + Py_ssize_t i; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + for (i = 0; i < size; i++) { + ((char*)ptr)[i] = 0; + } + Py_RETURN_NONE; + """), + ]) + + def test_string(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + + assert buffer_support.check_readbuffer(s) + assert s == buffer_support.readbuffer_as_string(s) + assert raises(TypeError, buffer_support.writebuffer_as_string, s) + assert s == buffer_support.charbuffer_as_string(s) + + def test_buffer(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + buf = buffer(s) + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + def test_mmap(self): + import mmap + buffer_support = self.get_buffer_support() + + s = 'a\0x' + mm = mmap.mmap(-1, 3) + mm[:] = s + + assert buffer_support.check_readbuffer(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + buffer_support.zero_out_writebuffer(mm) + assert s == ''.join(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + ro_mm = mmap.mmap(-1, 3, access=mmap.ACCESS_READ) + assert buffer_support.check_readbuffer(ro_mm) + assert s == buffer_support.readbuffer_as_string(ro_mm) + assert raises(TypeError, buffer_support.writebuffer_as_string, ro_mm) + assert s == buffer_support.charbuffer_as_string(ro_mm) + + def test_array(self): + import array + buffer_support = self.get_buffer_support() + + s = 'a\0x' + a = array.array('B', [5, 0, 10]) + + buffer_support.zero_out_writebuffer(a) + assert list(a) == [0, 0, 0] + + def test_nonbuffer(self): + # e.g. int + buffer_support = self.get_buffer_support() + + assert not buffer_support.check_readbuffer(42) + assert raises(TypeError, buffer_support.readbuffer_as_string, 42) + assert raises(TypeError, buffer_support.writebuffer_as_string, 42) + assert raises(TypeError, buffer_support.charbuffer_as_string, 42) + + def test_user_class(self): + class MyBuf(str): + pass + s = 'a\0x' + buf = MyBuf(s) + buffer_support = self.get_buffer_support() + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + diff --git a/pypy/module/cpyext/test0/test_bufferobject.py b/pypy/module/cpyext/test0/test_bufferobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test0/test_bufferobject.py @@ -0,0 +1,123 @@ +from rpython.rtyper.lltypesystem import lltype +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.api import PyObject + +class AppTestBufferObject(AppTestCpythonExtensionBase): + + def test_FromMemory(self): + module = self.import_extension('foo', [ + ("get_FromMemory", "METH_NOARGS", + """ + cbuf = malloc(4); + cbuf[0] = 'a'; + cbuf[1] = 'b'; + cbuf[2] = 'c'; + cbuf[3] = '\\0'; + return PyBuffer_FromMemory(cbuf, 4); + """), + ("free_buffer", "METH_NOARGS", + """ + free(cbuf); + Py_RETURN_NONE; + """), + ("check_ascharbuffer", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """) + ], prologue = """ + static char* cbuf = NULL; + """) + buf = module.get_FromMemory() + assert str(buf) == 'abc\0' + + assert module.check_ascharbuffer(buf) == 'abc\0' + + module.free_buffer() + + def test_Buffer_New(self): + module = self.import_extension('foo', [ + ("buffer_new", "METH_NOARGS", + """ + return PyBuffer_New(150); + """), + ]) + b = module.buffer_new() + raises(AttributeError, getattr, b, 'x') + + def test_array_buffer(self): + if self.runappdirect: + skip('PyBufferObject not available outside buffer object.c') + module = self.import_extension('foo', [ + ("roundtrip", "METH_O", + """ + PyBufferObject *buf = (PyBufferObject *)args; + return PyString_FromStringAndSize(buf->b_ptr, buf->b_size); + """), + ]) + import array + a = array.array('c', 'text') + b = buffer(a) + assert module.roundtrip(b) == 'text' + + + def test_issue2752(self): + iterations = 10 + if self.runappdirect: + iterations = 2000 + module = self.import_extension('foo', [ + ("test_mod", 'METH_VARARGS', + """ + PyObject *obj; + Py_buffer bp; + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + + if (((unsigned char*)bp.buf)[0] != '0') { + void * buf = (void*)bp.buf; + unsigned char val[4]; + char * s = PyString_AsString(obj); + memcpy(val, bp.buf, 4); + PyBuffer_Release(&bp); + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + PyErr_Format(PyExc_ValueError, + "mismatch: %p [%x %x %x %x...] now %p [%x %x %x %x...] as str '%s'", + buf, val[0], val[1], val[2], val[3], + (void *)bp.buf, + ((unsigned char*)bp.buf)[0], + ((unsigned char*)bp.buf)[1], + ((unsigned char*)bp.buf)[2], + ((unsigned char*)bp.buf)[3], + s); + PyBuffer_Release(&bp); + return NULL; + } + + PyBuffer_Release(&bp); + Py_RETURN_NONE; + """), + ]) + bufsize = 4096 + def getdata(bufsize): + data = b'01234567' + for x in range(18): + data += data + if len(data) >= bufsize: + break + return data + for j in range(iterations): + block = getdata(bufsize) + assert block[:8] == '01234567' + try: + module.test_mod(block) + except ValueError as e: + print("%s at it=%d" % (e, j)) + assert False diff --git a/pypy/module/cpyext/test0/test_bytesobject.py b/pypy/module/cpyext/test0/test_bytesobject.py --- a/pypy/module/cpyext/test0/test_bytesobject.py +++ b/pypy/module/cpyext/test0/test_bytesobject.py @@ -314,3 +314,298 @@ w_obj = space.wrap(u"test") with raises_w(space, TypeError): api.PyBytes_FromObject(w_obj) + PyString_AS_STRING(o); + PyString_AS_STRING(u); + + return o; + """)]) + assert module.test_macro_invocations() == '' + + def test_hash_and_state(self): + module = self.import_extension('foo', [ + ("test_hash", "METH_VARARGS", + ''' + PyObject* obj = (PyTuple_GetItem(args, 0)); + long hash = ((PyBytesObject*)obj)->ob_shash; + return PyLong_FromLong(hash); + ''' + ), + ("test_sstate", "METH_NOARGS", + ''' + PyObject *s = PyString_FromString("xyz"); + /*int sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + PyString_InternInPlace(&s); + /*sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + Py_DECREF(s); + return PyBool_FromLong(1); + '''), + ], prologue='#include ') + res = module.test_hash("xyz") + assert res == hash('xyz') + # doesn't really test, but if printf is enabled will prove sstate + assert module.test_sstate() + + def test_subclass(self): + # taken from PyStringArrType_Type in numpy's scalartypes.c.src + module = self.import_extension('bar', [ + ("newsubstr", "METH_O", + """ + PyObject * obj; + char * data; + int len; + + data = PyString_AS_STRING(args); + len = PyString_GET_SIZE(args); + if (data == NULL) + Py_RETURN_NONE; + obj = PyArray_Scalar(data, len); + return obj; + """), + ("get_len", "METH_O", + """ + return PyLong_FromLong(PyObject_Size(args)); + """), + ('has_nb_add', "METH_O", + ''' + if (args->ob_type->tp_as_number == NULL) { + Py_RETURN_FALSE; + } + if (args->ob_type->tp_as_number->nb_add == NULL) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + '''), + ], prologue=""" + #include + PyTypeObject PyStringArrType_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "bar.string_", /* tp_name*/ + sizeof(PyBytesObject), /* tp_basicsize*/ + 0 /* tp_itemsize */ + }; + + static PyObject * + stringtype_repr(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + static PyObject * + stringtype_str(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + PyObject * + PyArray_Scalar(char *data, int n) + { + PyTypeObject *type = &PyStringArrType_Type; + PyObject *obj; + void *destptr; + int itemsize = n; + obj = type->tp_alloc(type, itemsize); + if (obj == NULL) { + return NULL; + } + destptr = PyString_AS_STRING(obj); + ((PyBytesObject *)obj)->ob_shash = -1; + memcpy(destptr, data, itemsize); + return obj; + } + """, more_init = ''' + PyStringArrType_Type.tp_alloc = NULL; + PyStringArrType_Type.tp_free = NULL; + + PyStringArrType_Type.tp_repr = stringtype_repr; + PyStringArrType_Type.tp_str = stringtype_str; + PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; + PyStringArrType_Type.tp_itemsize = sizeof(char); + PyStringArrType_Type.tp_base = &PyString_Type; + PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; + if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; + ''') + + a = module.newsubstr('abc') + assert module.has_nb_add('a') is False + assert module.has_nb_add(a) is False + assert type(a).__name__ == 'string_' + assert a == 'abc' + assert 3 == module.get_len(a) + b = module.newsubstr('') + assert 0 == module.get_len(b) + +class TestBytes(BaseApiTest): + def test_bytes_resize(self, space): + py_str = new_empty_str(space, 10) + ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + py_str.c_ob_sval[0] = 'a' + py_str.c_ob_sval[1] = 'b' + py_str.c_ob_sval[2] = 'c' + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 3) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 3 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[3] == '\x00' + # the same for growing + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 10) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 10 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[10] == '\x00' + decref(space, ar[0]) + lltype.free(ar, flavor='raw') + + def test_string_buffer(self, space): + py_str = new_empty_str(space, 10) + c_buf = py_str.c_ob_type.c_tp_as_buffer + assert c_buf + py_obj = rffi.cast(PyObject, py_str) + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, lltype.nullptr(Py_ssize_tP.TO)) == 1 + ref = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, ref) == 1 + assert ref[0] == 10 + lltype.free(ref, flavor='raw') + ref = lltype.malloc(rffi.VOIDPP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getreadbuffer, + py_obj, 0, ref) == 10 + lltype.free(ref, flavor='raw') + decref(space, py_obj) + + def test_Concat(self, space): + ref = make_ref(space, space.wrap('abc')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref + prev_refcnt = ref.c_ob_refcnt + PyString_Concat(space, ptr, space.wrap('def')) + assert ref.c_ob_refcnt == prev_refcnt - 1 + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + with pytest.raises(OperationError): + PyString_Concat(space, ptr, space.w_None) + assert not ptr[0] + ptr[0] = lltype.nullptr(PyObject.TO) + PyString_Concat(space, ptr, space.wrap('def')) # should not crash + lltype.free(ptr, flavor='raw') + + def test_ConcatAndDel(self, space): + ref1 = make_ref(space, space.wrap('abc')) + ref2 = make_ref(space, space.wrap('def')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref1 + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + assert ref2.c_ob_refcnt == prev_refcnf - 1 + decref(space, ptr[0]) + ptr[0] = lltype.nullptr(PyObject.TO) + ref2 = make_ref(space, space.wrap('foo')) + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) # should not crash + assert ref2.c_ob_refcnt == prev_refcnf - 1 + lltype.free(ptr, flavor='raw') + + def test_format(self, space): + assert "1 2" == space.unwrap( + PyString_Format(space, space.wrap('%s %d'), space.wrap((1, 2)))) + + def test_asbuffer(self, space): + bufp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + + w_text = space.wrap("text") + ref = make_ref(space, w_text) + prev_refcnt = ref.c_ob_refcnt + assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 + assert ref.c_ob_refcnt == prev_refcnt + assert lenp[0] == 4 + assert rffi.charp2str(bufp[0]) == 'text' + lltype.free(bufp, flavor='raw') + lltype.free(lenp, flavor='raw') + decref(space, ref) + + def test_intern(self, space): + buf = rffi.str2charp("test") + w_s1 = PyString_InternFromString(space, buf) + w_s2 = PyString_InternFromString(space, buf) + rffi.free_charp(buf) + assert w_s1 is w_s2 + + def test_AsEncodedObject(self, space): + ptr = space.wrap('abc') + + errors = rffi.str2charp("strict") + + encoding = rffi.str2charp("hex") + res = PyString_AsEncodedObject(space, ptr, encoding, errors) + assert space.unwrap(res) == "616263" + + res = PyString_AsEncodedObject(space, + ptr, encoding, lltype.nullptr(rffi.CCHARP.TO)) + assert space.unwrap(res) == "616263" + rffi.free_charp(encoding) + + encoding = rffi.str2charp("unknown_encoding") + with raises_w(space, LookupError): + PyString_AsEncodedObject(space, ptr, encoding, errors) + rffi.free_charp(encoding) + + rffi.free_charp(errors) + + NULL = lltype.nullptr(rffi.CCHARP.TO) + res = PyString_AsEncodedObject(space, ptr, NULL, NULL) + assert space.unwrap(res) == "abc" + with raises_w(space, TypeError): + PyString_AsEncodedObject(space, space.wrap(2), NULL, NULL) + + def test_AsDecodedObject(self, space): + w_str = space.wrap('caf\xe9') + encoding = rffi.str2charp("latin-1") + w_res = PyString_AsDecodedObject(space, w_str, encoding, None) + rffi.free_charp(encoding) + assert w_res._utf8 == u"caf\xe9".encode('utf8') + + def test_eq(self, space): + assert 1 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("hello")) + assert 0 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("world")) + + def test_join(self, space): + w_sep = space.wrap('') + w_seq = space.wrap(['a', 'b']) + w_joined = _PyString_Join(space, w_sep, w_seq) + assert space.unwrap(w_joined) == 'ab' diff --git a/pypy/module/cpyext/test0/test_intobject.py b/pypy/module/cpyext/test0/test_intobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test0/test_intobject.py @@ -0,0 +1,247 @@ +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.intobject import ( + PyInt_Check, PyInt_AsLong, PyInt_AS_LONG, + PyInt_AsUnsignedLong, PyInt_AsUnsignedLongMask, + PyInt_AsUnsignedLongLongMask) +from pypy.module.cpyext.pyobject import (decref, make_ref, + get_w_obj_and_decref) +from pypy.module.cpyext.state import State +import sys + +class TestIntObject(BaseApiTest): + def test_intobject(self, space): + state = space.fromcache(State) + assert PyInt_Check(space, space.wrap(3)) + assert PyInt_Check(space, space.w_True) + assert not PyInt_Check(space, space.wrap((1, 2, 3))) + for i in [3, -5, -1, -sys.maxint, sys.maxint - 1]: + x = PyInt_AsLong(space, space.wrap(i)) + y = PyInt_AS_LONG(space, space.wrap(i)) + assert x == i + assert y == i + py_x = state.C.PyInt_FromLong(x + 1) + w_x = get_w_obj_and_decref(space, py_x) + assert space.type(w_x) is space.w_int + assert space.eq_w(w_x, space.wrap(i + 1)) + + with raises_w(space, TypeError): + PyInt_AsLong(space, space.w_None) + + with raises_w(space, TypeError): + PyInt_AsLong(space, None) + + assert PyInt_AsUnsignedLong(space, space.wrap(sys.maxint)) == sys.maxint + with raises_w(space, ValueError): + PyInt_AsUnsignedLong(space, space.wrap(-5)) + + assert (PyInt_AsUnsignedLongMask(space, space.wrap(sys.maxint)) + == sys.maxint) + assert (PyInt_AsUnsignedLongMask(space, space.wrap(10 ** 30)) + == 10 ** 30 % ((sys.maxint + 1) * 2)) + + assert (PyInt_AsUnsignedLongLongMask(space, space.wrap(sys.maxint)) + == sys.maxint) + assert (PyInt_AsUnsignedLongLongMask(space, space.wrap(10 ** 30)) + == 10 ** 30 % (2 ** 64)) + + def test_freelist_direct(self, space): + state = space.fromcache(State) + p_x = state.C.PyInt_FromLong(12345678) + decref(space, p_x) + p_y = state.C.PyInt_FromLong(87654321) + # check that the address is the same, i.e. that the freelist did its + # job + assert p_x == p_y + decref(space, p_y) + + def test_freelist_make_ref(self, space): + w_x = space.newint(12345678) + w_y = space.newint(87654321) + p_x = make_ref(space, w_x) + decref(space, p_x) + p_y = make_ref(space, w_y) + # check that the address is the same: note that w_x does NOT keep p_x + # alive, because in make_ref we have a special case for ints + assert p_x == p_y + decref(space, p_y) + + def test_freelist_int_subclass(self, space): + w_MyInt = space.appexec([], """(): + class MyInt(int): + pass + return MyInt""") + w_x = space.call_function(w_MyInt, space.newint(12345678)) + w_y = space.call_function(w_MyInt, space.newint(87654321)) + p_x = make_ref(space, w_x) + decref(space, p_x) + p_y = make_ref(space, w_y) + # now the address is different because the freelist does not work for + # int subclasses + assert p_x != p_y + decref(space, p_y) + + def test_coerce(self, space): + w_obj = space.appexec([], """(): + class Coerce(object): + def __int__(self): + return 42 + return Coerce()""") + assert PyInt_AsLong(space, w_obj) == 42 + +class AppTestIntObject(AppTestCpythonExtensionBase): + def test_fromstring(self): + module = self.import_extension('foo', [ + ("from_string", "METH_NOARGS", + """ + return PyInt_FromString("1234", NULL, 16); + """), + ]) + assert module.from_string() == 0x1234 + assert type(module.from_string()) is int + + def test_size_t(self): + module = self.import_extension('foo', [ + ("values", "METH_NOARGS", + """ + return Py_BuildValue("NNNN", + PyInt_FromSize_t(123), + PyInt_FromSize_t((size_t)-1), + PyInt_FromSsize_t(123), + PyInt_FromSsize_t((size_t)-1)); + """), + ]) + values = module.values() + types = [type(x) for x in values] + assert types == [int, long, int, int] + + def test_int_subtype(self): + module = self.import_extension( + 'foo', [ + ("newEnum", "METH_VARARGS", + """ + EnumObject *enumObj; + int intval; + PyObject *name; + + if (!PyArg_ParseTuple(args, "Oi", &name, &intval)) + return NULL; + + enumObj = PyObject_New(EnumObject, &Enum_Type); + if (!enumObj) { + return NULL; + } + + enumObj->ob_ival = intval; + Py_INCREF(name); + enumObj->ob_name = name; + + return (PyObject *)enumObj; + """), + ], + prologue=""" + #include "structmember.h" + typedef struct + { + PyObject_HEAD + long ob_ival; + PyObject* ob_name; + } EnumObject; + + static void + enum_dealloc(PyObject *op) + { + Py_DECREF(((EnumObject *)op)->ob_name); + Py_TYPE(op)->tp_free(op); + } + + static PyMemberDef enum_members[] = { + {"name", T_OBJECT, offsetof(EnumObject, ob_name), 0, NULL}, + {NULL} /* Sentinel */ + }; + + PyTypeObject Enum_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + /*tp_name*/ "Enum", + /*tp_basicsize*/ sizeof(EnumObject), + /*tp_itemsize*/ 0, + /*tp_dealloc*/ enum_dealloc, + /*tp_print*/ 0, + /*tp_getattr*/ 0, + /*tp_setattr*/ 0, + /*tp_compare*/ 0, + /*tp_repr*/ 0, + /*tp_as_number*/ 0, + /*tp_as_sequence*/ 0, + /*tp_as_mapping*/ 0, + /*tp_hash*/ 0, + /*tp_call*/ 0, + /*tp_str*/ 0, + /*tp_getattro*/ 0, + /*tp_setattro*/ 0, + /*tp_as_buffer*/ 0, + /*tp_flags*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + /*tp_doc*/ 0, + /*tp_traverse*/ 0, + /*tp_clear*/ 0, + /*tp_richcompare*/ 0, + /*tp_weaklistoffset*/ 0, + /*tp_iter*/ 0, + /*tp_iternext*/ 0, + /*tp_methods*/ 0, + /*tp_members*/ enum_members, + /*tp_getset*/ 0, + /*tp_base*/ 0, /* set to &PyInt_Type in init function for MSVC */ + /*tp_dict*/ 0, + /*tp_descr_get*/ 0, + /*tp_descr_set*/ 0, + /*tp_dictoffset*/ 0, + /*tp_init*/ 0, + /*tp_alloc*/ 0, + /*tp_new*/ 0 + }; + """, more_init = ''' + Enum_Type.tp_base = &PyInt_Type; + if (PyType_Ready(&Enum_Type) < 0) INITERROR; + ''') + + a = module.newEnum("ULTIMATE_ANSWER", 42) + assert type(a).__name__ == "Enum" + assert isinstance(a, int) + assert a == int(a) == 42 + assert a.name == "ULTIMATE_ANSWER" + + def test_int_cast(self): + mod = self.import_extension('foo', [ + #prove it works for ints + ("test_int", "METH_NOARGS", + """ + PyObject * obj = PyInt_FromLong(42); + PyObject * val; + if (!PyInt_Check(obj)) { + Py_DECREF(obj); + PyErr_SetNone(PyExc_ValueError); + return NULL; + } + val = PyInt_FromLong(((PyIntObject *)obj)->ob_ival); + Py_DECREF(obj); + return val; + """ + ), + ]) + i = mod.test_int() + assert isinstance(i, int) + assert i == 42 + + def test_int_macros(self): + mod = self.import_extension('foo', [ + ("test_macros", "METH_NOARGS", + """ + PyObject * obj = PyInt_FromLong(42); + PyIntObject * i = (PyIntObject*)obj; + PyInt_AS_LONG(obj); + PyInt_AS_LONG(i); + Py_RETURN_NONE; + """ + ), + ]) diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,6 +3,7 @@ import sys import os +from rpython.rlib import _rsocket_rffi as _c class Module(MixedModule): @@ -31,6 +32,10 @@ for symbol in symbol_map: interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol + if _c.PIPE_BUF is not None: + interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % _c.PIPE_BUF + + def buildloaders(cls): from rpython.rlib import rpoll for name in rpoll.eventnames: diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py --- a/pypy/module/select/test/test_select.py +++ b/pypy/module/select/test/test_select.py @@ -319,6 +319,11 @@ # ^^^ CPython gives 100, PyPy gives 1. I think both are OK as # long as there is no crash. + def test_PIPE_BUF(self): + # no PIPE_BUF on Windows; this test class is skipped on Windows. + import select + assert isinstance(select.PIPE_BUF, int) + class AppTestSelectWithSockets(_AppTestSelect): """Same tests with connected sockets. diff --git a/pypy/module/test_lib_pypy/test_code_extra.py b/pypy/module/test_lib_pypy/test_code_extra.py new file mode 100644 --- /dev/null +++ b/pypy/module/test_lib_pypy/test_code_extra.py @@ -0,0 +1,19 @@ +import py +import sys +import cStringIO +import code + + +def test_flush_stdout_on_error(): + runner = code.InteractiveInterpreter() + old_stdout = sys.stdout + try: + mystdout = cStringIO.StringIO() + sys.stdout = mystdout + runner.runcode(compile("print 5,;0/0", "", "exec")) + finally: + sys.stdout = old_stdout + + if '__pypy__' not in sys.builtin_module_names: + py.test.skip('pypy only test') + assert mystdout.getvalue() == "5\n" diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1164,3 +1164,31 @@ assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345' assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 ' + def test_unicode_constructor_misc(self): + x = u'foo' + x += u'bar' + assert unicode(x) is x + # + class U(unicode): + def __unicode__(self): + return u'BOK' + u = U(x) + assert unicode(u) == u'BOK' + # + class U2(unicode): + pass + z = U2(u'foobaz') + assert type(unicode(z)) is unicode + assert unicode(z) == u'foobaz' + # + # two completely corner cases where we differ from CPython: + #assert unicode(encoding='supposedly_the_encoding') == u'' + #assert unicode(errors='supposedly_the_error') == u'' + e = raises(TypeError, unicode, u'', 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, u'', errors='supposedly_the_error') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, u, 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, unicode, z, 'supposedly_the_encoding') + assert str(e.value) == 'decoding Unicode is not supported' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -122,7 +122,7 @@ @staticmethod def convert_arg_to_w_unicode(space, w_other, strict=None): - if isinstance(w_other, W_UnicodeObject): + if space.is_w(space.type(w_other), space.w_unicode): return w_other if space.isinstance_w(w_other, space.w_bytes): raise oefmt(space.w_TypeError, @@ -130,7 +130,7 @@ if strict: raise oefmt(space.w_TypeError, "%s arg must be None, unicode or str", strict) - return unicode_from_encoded_object(space, w_other, None, "strict") + return unicode_from_encoded_object(space, w_other, 'utf8', "strict") def convert_to_w_unicode(self, space): return self diff --git a/rpython/doc/examples.rst b/rpython/doc/examples.rst --- a/rpython/doc/examples.rst +++ b/rpython/doc/examples.rst @@ -19,7 +19,7 @@ * Typhon, 'A virtual machine for Monte', in active development, https://github.com/monte-language/typhon * Tulip, an untyped functional language, in language design mode, maintained, - https://github.com/tulip-lang/tulip/ + https://github.com/tulip-lang/tulip/ * Pycket, a Racket implementation, proof of concept, small language core working, a lot of primitives are missing. Slow development https://github.com/samth/pycket diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -33,6 +33,7 @@ 'arpa/inet.h', 'stdint.h', 'errno.h', + 'limits.h', ) if _HAS_AF_PACKET: includes += ('netpacket/packet.h', @@ -113,6 +114,7 @@ F_GETFL = platform.DefinedConstantInteger('F_GETFL') F_SETFL = platform.DefinedConstantInteger('F_SETFL') FIONBIO = platform.DefinedConstantInteger('FIONBIO') + PIPE_BUF = platform.DefinedConstantInteger('PIPE_BUF') INVALID_SOCKET = platform.DefinedConstantInteger('INVALID_SOCKET') INET_ADDRSTRLEN = platform.DefinedConstantInteger('INET_ADDRSTRLEN') @@ -1085,6 +1087,7 @@ WSAEWOULDBLOCK = cConfig.WSAEWOULDBLOCK or cConfig.EWOULDBLOCK WSAEAFNOSUPPORT = cConfig.WSAEAFNOSUPPORT or cConfig.EAFNOSUPPORT EISCONN = cConfig.EISCONN or cConfig.WSAEISCONN +PIPE_BUF = cConfig.PIPE_BUF # may be None linux = cConfig.linux WIN32 = cConfig.WIN32 From pypy.commits at gmail.com Tue Sep 11 16:33:11 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:33:11 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5b982687.1c69fb81.16970.b8c1@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95098:b806e254dff4 Date: 2018-09-11 22:56 +0300 http://bitbucket.org/pypy/pypy/changeset/b806e254dff4/ Log: merge py3.5 into branch diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_ztranslation.py @@ -0,0 +1,15 @@ +from pypy.interpreter.typedef import GetSetProperty +from pypy.module.exceptions.interp_exceptions import W_BaseException +from pypy.objspace.fake.checkmodule import checkmodule + +def test_checkmodule(): + # XXX: PyTraceback usage in these methods blows up checkmodule + def descr_gettraceback(self, space): + return space.w_None + def descr_settraceback(self, space, w_newtraceback): + pass + W_BaseException.descr_gettraceback = descr_gettraceback + W_BaseException.descr_settraceback = descr_settraceback + W_BaseException.typedef.add_entries( + __traceback__=GetSetProperty(descr_gettraceback, descr_settraceback)) + checkmodule('_io') diff --git a/pypy/module/cpyext/test0/test_abstract.py b/pypy/module/cpyext/test0/test_abstract.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/test_abstract.py +++ /dev/null @@ -1,130 +0,0 @@ -from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase -import pytest - -class AppTestBufferProtocol(AppTestCpythonExtensionBase): - """Tests for the old buffer protocol.""" - - def w_get_buffer_support(self): - return self.import_extension('buffer_support', [ - ("charbuffer_as_string", "METH_O", - """ - char *ptr; - Py_ssize_t size; - if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) - return NULL; - return PyString_FromStringAndSize(ptr, size); - """), - ("check_readbuffer", "METH_O", - """ - return PyBool_FromLong(PyObject_CheckReadBuffer(args)); - """), - ("readbuffer_as_string", "METH_O", - """ - const void *ptr; - Py_ssize_t size; - if (PyObject_AsReadBuffer(args, &ptr, &size) < 0) - return NULL; - return PyString_FromStringAndSize((char*)ptr, size); - """), - ("writebuffer_as_string", "METH_O", - """ - void *ptr; - Py_ssize_t size; - if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) - return NULL; - return PyString_FromStringAndSize((char*)ptr, size); - """), - ("zero_out_writebuffer", "METH_O", - """ - void *ptr; - Py_ssize_t size; - Py_ssize_t i; - if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) - return NULL; - for (i = 0; i < size; i++) { - ((char*)ptr)[i] = 0; - } - Py_RETURN_NONE; - """), - ]) - - def test_string(self): - buffer_support = self.get_buffer_support() - - s = 'a\0x' - - assert buffer_support.check_readbuffer(s) - assert s == buffer_support.readbuffer_as_string(s) - assert raises(TypeError, buffer_support.writebuffer_as_string, s) - assert s == buffer_support.charbuffer_as_string(s) - - def test_buffer(self): - buffer_support = self.get_buffer_support() - - s = 'a\0x' - buf = buffer(s) - - assert buffer_support.check_readbuffer(buf) - assert s == buffer_support.readbuffer_as_string(buf) - assert raises(TypeError, buffer_support.writebuffer_as_string, buf) - assert s == buffer_support.charbuffer_as_string(buf) - - def test_mmap(self): - import mmap - buffer_support = self.get_buffer_support() - - s = 'a\0x' - mm = mmap.mmap(-1, 3) - mm[:] = s - - assert buffer_support.check_readbuffer(mm) - assert s == buffer_support.readbuffer_as_string(mm) - assert s == buffer_support.writebuffer_as_string(mm) - assert s == buffer_support.charbuffer_as_string(mm) - - s = '\0' * 3 - buffer_support.zero_out_writebuffer(mm) - assert s == ''.join(mm) - assert s == buffer_support.readbuffer_as_string(mm) - assert s == buffer_support.writebuffer_as_string(mm) - assert s == buffer_support.charbuffer_as_string(mm) - - s = '\0' * 3 - ro_mm = mmap.mmap(-1, 3, access=mmap.ACCESS_READ) - assert buffer_support.check_readbuffer(ro_mm) - assert s == buffer_support.readbuffer_as_string(ro_mm) - assert raises(TypeError, buffer_support.writebuffer_as_string, ro_mm) - assert s == buffer_support.charbuffer_as_string(ro_mm) - - def test_array(self): - import array - buffer_support = self.get_buffer_support() - - s = 'a\0x' - a = array.array('B', [5, 0, 10]) - - buffer_support.zero_out_writebuffer(a) - assert list(a) == [0, 0, 0] - - def test_nonbuffer(self): - # e.g. int - buffer_support = self.get_buffer_support() - - assert not buffer_support.check_readbuffer(42) - assert raises(TypeError, buffer_support.readbuffer_as_string, 42) - assert raises(TypeError, buffer_support.writebuffer_as_string, 42) - assert raises(TypeError, buffer_support.charbuffer_as_string, 42) - - def test_user_class(self): - class MyBuf(str): - pass - s = 'a\0x' - buf = MyBuf(s) - buffer_support = self.get_buffer_support() - - assert buffer_support.check_readbuffer(buf) - assert s == buffer_support.readbuffer_as_string(buf) - assert raises(TypeError, buffer_support.writebuffer_as_string, buf) - assert s == buffer_support.charbuffer_as_string(buf) - - diff --git a/pypy/module/cpyext/test0/test_bufferobject.py b/pypy/module/cpyext/test0/test_bufferobject.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/test_bufferobject.py +++ /dev/null @@ -1,123 +0,0 @@ -from rpython.rtyper.lltypesystem import lltype -from pypy.module.cpyext.test.test_api import BaseApiTest -from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase -from pypy.module.cpyext.api import PyObject - -class AppTestBufferObject(AppTestCpythonExtensionBase): - - def test_FromMemory(self): - module = self.import_extension('foo', [ - ("get_FromMemory", "METH_NOARGS", - """ - cbuf = malloc(4); - cbuf[0] = 'a'; - cbuf[1] = 'b'; - cbuf[2] = 'c'; - cbuf[3] = '\\0'; - return PyBuffer_FromMemory(cbuf, 4); - """), - ("free_buffer", "METH_NOARGS", - """ - free(cbuf); - Py_RETURN_NONE; - """), - ("check_ascharbuffer", "METH_O", - """ - char *ptr; - Py_ssize_t size; - if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) - return NULL; - return PyString_FromStringAndSize(ptr, size); - """) - ], prologue = """ - static char* cbuf = NULL; - """) - buf = module.get_FromMemory() - assert str(buf) == 'abc\0' - - assert module.check_ascharbuffer(buf) == 'abc\0' - - module.free_buffer() - - def test_Buffer_New(self): - module = self.import_extension('foo', [ - ("buffer_new", "METH_NOARGS", - """ - return PyBuffer_New(150); - """), - ]) - b = module.buffer_new() - raises(AttributeError, getattr, b, 'x') - - def test_array_buffer(self): - if self.runappdirect: - skip('PyBufferObject not available outside buffer object.c') - module = self.import_extension('foo', [ - ("roundtrip", "METH_O", - """ - PyBufferObject *buf = (PyBufferObject *)args; - return PyString_FromStringAndSize(buf->b_ptr, buf->b_size); - """), - ]) - import array - a = array.array('c', 'text') - b = buffer(a) - assert module.roundtrip(b) == 'text' - - - def test_issue2752(self): - iterations = 10 - if self.runappdirect: - iterations = 2000 - module = self.import_extension('foo', [ - ("test_mod", 'METH_VARARGS', - """ - PyObject *obj; - Py_buffer bp; - if (!PyArg_ParseTuple(args, "O", &obj)) - return NULL; - - if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) - return NULL; - - if (((unsigned char*)bp.buf)[0] != '0') { - void * buf = (void*)bp.buf; - unsigned char val[4]; - char * s = PyString_AsString(obj); - memcpy(val, bp.buf, 4); - PyBuffer_Release(&bp); - if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) - return NULL; - PyErr_Format(PyExc_ValueError, - "mismatch: %p [%x %x %x %x...] now %p [%x %x %x %x...] as str '%s'", - buf, val[0], val[1], val[2], val[3], - (void *)bp.buf, - ((unsigned char*)bp.buf)[0], - ((unsigned char*)bp.buf)[1], - ((unsigned char*)bp.buf)[2], - ((unsigned char*)bp.buf)[3], - s); - PyBuffer_Release(&bp); - return NULL; - } - - PyBuffer_Release(&bp); - Py_RETURN_NONE; - """), - ]) - bufsize = 4096 - def getdata(bufsize): - data = b'01234567' - for x in range(18): - data += data - if len(data) >= bufsize: - break - return data - for j in range(iterations): - block = getdata(bufsize) - assert block[:8] == '01234567' - try: - module.test_mod(block) - except ValueError as e: - print("%s at it=%d" % (e, j)) - assert False diff --git a/pypy/module/cpyext/test0/test_intobject.py b/pypy/module/cpyext/test0/test_intobject.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/test_intobject.py +++ /dev/null @@ -1,247 +0,0 @@ -from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w -from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase -from pypy.module.cpyext.intobject import ( - PyInt_Check, PyInt_AsLong, PyInt_AS_LONG, - PyInt_AsUnsignedLong, PyInt_AsUnsignedLongMask, - PyInt_AsUnsignedLongLongMask) -from pypy.module.cpyext.pyobject import (decref, make_ref, - get_w_obj_and_decref) -from pypy.module.cpyext.state import State -import sys - -class TestIntObject(BaseApiTest): - def test_intobject(self, space): - state = space.fromcache(State) - assert PyInt_Check(space, space.wrap(3)) - assert PyInt_Check(space, space.w_True) - assert not PyInt_Check(space, space.wrap((1, 2, 3))) - for i in [3, -5, -1, -sys.maxint, sys.maxint - 1]: - x = PyInt_AsLong(space, space.wrap(i)) - y = PyInt_AS_LONG(space, space.wrap(i)) - assert x == i - assert y == i - py_x = state.C.PyInt_FromLong(x + 1) - w_x = get_w_obj_and_decref(space, py_x) - assert space.type(w_x) is space.w_int - assert space.eq_w(w_x, space.wrap(i + 1)) - - with raises_w(space, TypeError): - PyInt_AsLong(space, space.w_None) - - with raises_w(space, TypeError): - PyInt_AsLong(space, None) - - assert PyInt_AsUnsignedLong(space, space.wrap(sys.maxint)) == sys.maxint - with raises_w(space, ValueError): - PyInt_AsUnsignedLong(space, space.wrap(-5)) - - assert (PyInt_AsUnsignedLongMask(space, space.wrap(sys.maxint)) - == sys.maxint) - assert (PyInt_AsUnsignedLongMask(space, space.wrap(10 ** 30)) - == 10 ** 30 % ((sys.maxint + 1) * 2)) - - assert (PyInt_AsUnsignedLongLongMask(space, space.wrap(sys.maxint)) - == sys.maxint) - assert (PyInt_AsUnsignedLongLongMask(space, space.wrap(10 ** 30)) - == 10 ** 30 % (2 ** 64)) - - def test_freelist_direct(self, space): - state = space.fromcache(State) - p_x = state.C.PyInt_FromLong(12345678) - decref(space, p_x) - p_y = state.C.PyInt_FromLong(87654321) - # check that the address is the same, i.e. that the freelist did its - # job - assert p_x == p_y - decref(space, p_y) - - def test_freelist_make_ref(self, space): - w_x = space.newint(12345678) - w_y = space.newint(87654321) - p_x = make_ref(space, w_x) - decref(space, p_x) - p_y = make_ref(space, w_y) - # check that the address is the same: note that w_x does NOT keep p_x - # alive, because in make_ref we have a special case for ints - assert p_x == p_y - decref(space, p_y) - - def test_freelist_int_subclass(self, space): - w_MyInt = space.appexec([], """(): - class MyInt(int): - pass - return MyInt""") - w_x = space.call_function(w_MyInt, space.newint(12345678)) - w_y = space.call_function(w_MyInt, space.newint(87654321)) - p_x = make_ref(space, w_x) - decref(space, p_x) - p_y = make_ref(space, w_y) - # now the address is different because the freelist does not work for - # int subclasses - assert p_x != p_y - decref(space, p_y) - - def test_coerce(self, space): - w_obj = space.appexec([], """(): - class Coerce(object): - def __int__(self): - return 42 - return Coerce()""") - assert PyInt_AsLong(space, w_obj) == 42 - -class AppTestIntObject(AppTestCpythonExtensionBase): - def test_fromstring(self): - module = self.import_extension('foo', [ - ("from_string", "METH_NOARGS", - """ - return PyInt_FromString("1234", NULL, 16); - """), - ]) - assert module.from_string() == 0x1234 - assert type(module.from_string()) is int - - def test_size_t(self): - module = self.import_extension('foo', [ - ("values", "METH_NOARGS", - """ - return Py_BuildValue("NNNN", - PyInt_FromSize_t(123), - PyInt_FromSize_t((size_t)-1), - PyInt_FromSsize_t(123), - PyInt_FromSsize_t((size_t)-1)); - """), - ]) - values = module.values() - types = [type(x) for x in values] - assert types == [int, long, int, int] - - def test_int_subtype(self): - module = self.import_extension( - 'foo', [ - ("newEnum", "METH_VARARGS", - """ - EnumObject *enumObj; - int intval; - PyObject *name; - - if (!PyArg_ParseTuple(args, "Oi", &name, &intval)) - return NULL; - - enumObj = PyObject_New(EnumObject, &Enum_Type); - if (!enumObj) { - return NULL; - } - - enumObj->ob_ival = intval; - Py_INCREF(name); - enumObj->ob_name = name; - - return (PyObject *)enumObj; - """), - ], - prologue=""" - #include "structmember.h" - typedef struct - { - PyObject_HEAD - long ob_ival; - PyObject* ob_name; - } EnumObject; - - static void - enum_dealloc(PyObject *op) - { - Py_DECREF(((EnumObject *)op)->ob_name); - Py_TYPE(op)->tp_free(op); - } - - static PyMemberDef enum_members[] = { - {"name", T_OBJECT, offsetof(EnumObject, ob_name), 0, NULL}, - {NULL} /* Sentinel */ - }; - - PyTypeObject Enum_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - /*tp_name*/ "Enum", - /*tp_basicsize*/ sizeof(EnumObject), - /*tp_itemsize*/ 0, - /*tp_dealloc*/ enum_dealloc, - /*tp_print*/ 0, - /*tp_getattr*/ 0, - /*tp_setattr*/ 0, - /*tp_compare*/ 0, - /*tp_repr*/ 0, - /*tp_as_number*/ 0, - /*tp_as_sequence*/ 0, - /*tp_as_mapping*/ 0, - /*tp_hash*/ 0, - /*tp_call*/ 0, - /*tp_str*/ 0, - /*tp_getattro*/ 0, - /*tp_setattro*/ 0, - /*tp_as_buffer*/ 0, - /*tp_flags*/ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, - /*tp_doc*/ 0, - /*tp_traverse*/ 0, - /*tp_clear*/ 0, - /*tp_richcompare*/ 0, - /*tp_weaklistoffset*/ 0, - /*tp_iter*/ 0, - /*tp_iternext*/ 0, - /*tp_methods*/ 0, - /*tp_members*/ enum_members, - /*tp_getset*/ 0, - /*tp_base*/ 0, /* set to &PyInt_Type in init function for MSVC */ - /*tp_dict*/ 0, - /*tp_descr_get*/ 0, - /*tp_descr_set*/ 0, - /*tp_dictoffset*/ 0, - /*tp_init*/ 0, - /*tp_alloc*/ 0, - /*tp_new*/ 0 - }; - """, more_init = ''' - Enum_Type.tp_base = &PyInt_Type; - if (PyType_Ready(&Enum_Type) < 0) INITERROR; - ''') - - a = module.newEnum("ULTIMATE_ANSWER", 42) - assert type(a).__name__ == "Enum" - assert isinstance(a, int) - assert a == int(a) == 42 - assert a.name == "ULTIMATE_ANSWER" - - def test_int_cast(self): - mod = self.import_extension('foo', [ - #prove it works for ints - ("test_int", "METH_NOARGS", - """ - PyObject * obj = PyInt_FromLong(42); - PyObject * val; - if (!PyInt_Check(obj)) { - Py_DECREF(obj); - PyErr_SetNone(PyExc_ValueError); - return NULL; - } - val = PyInt_FromLong(((PyIntObject *)obj)->ob_ival); - Py_DECREF(obj); - return val; - """ - ), - ]) - i = mod.test_int() - assert isinstance(i, int) - assert i == 42 - - def test_int_macros(self): - mod = self.import_extension('foo', [ - ("test_macros", "METH_NOARGS", - """ - PyObject * obj = PyInt_FromLong(42); - PyIntObject * i = (PyIntObject*)obj; - PyInt_AS_LONG(obj); - PyInt_AS_LONG(i); - Py_RETURN_NONE; - """ - ), - ]) diff --git a/pypy/module/test_lib_pypy/test_code_extra.py b/pypy/module/test_lib_pypy/test_code_extra.py deleted file mode 100644 --- a/pypy/module/test_lib_pypy/test_code_extra.py +++ /dev/null @@ -1,19 +0,0 @@ -import py -import sys -import cStringIO -import code - - -def test_flush_stdout_on_error(): - runner = code.InteractiveInterpreter() - old_stdout = sys.stdout - try: - mystdout = cStringIO.StringIO() - sys.stdout = mystdout - runner.runcode(compile("print 5,;0/0", "", "exec")) - finally: - sys.stdout = old_stdout - - if '__pypy__' not in sys.builtin_module_names: - py.test.skip('pypy only test') - assert mystdout.getvalue() == "5\n" diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1164,31 +1164,53 @@ assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345' assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 ' + def test_encode_wrong_errors(self): + assert ''.encode(errors='some_wrong_name') == b'' + + def test_casefold(self): + assert u'hello'.casefold() == u'hello' + assert u'hELlo'.casefold() == u'hello' + assert u'ß'.casefold() == u'ss' + assert u'fi'.casefold() == u'fi' + assert u'\u03a3'.casefold() == u'\u03c3' + assert u'A\u0345\u03a3'.casefold() == u'a\u03b9\u03c3' + assert u'\u00b5'.casefold() == u'\u03bc' + + def test_lower_3a3(self): + # Special case for GREEK CAPITAL LETTER SIGMA U+03A3 + assert u'\u03a3'.lower() == u'\u03c3' + assert u'\u0345\u03a3'.lower() == u'\u0345\u03c3' + assert u'A\u0345\u03a3'.lower() == u'a\u0345\u03c2' + assert u'A\u0345\u03a3a'.lower() == u'a\u0345\u03c3a' + assert u'A\u0345\u03a3'.lower() == u'a\u0345\u03c2' + assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345' + assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 ' + def test_unicode_constructor_misc(self): x = u'foo' x += u'bar' - assert unicode(x) is x + assert str(x) is x # - class U(unicode): - def __unicode__(self): + class U(str): + def __str__(self): return u'BOK' u = U(x) - assert unicode(u) == u'BOK' + assert str(u) == u'BOK' # - class U2(unicode): + class U2(str): pass z = U2(u'foobaz') - assert type(unicode(z)) is unicode - assert unicode(z) == u'foobaz' + assert type(str(z)) is str + assert str(z) == u'foobaz' # # two completely corner cases where we differ from CPython: #assert unicode(encoding='supposedly_the_encoding') == u'' #assert unicode(errors='supposedly_the_error') == u'' - e = raises(TypeError, unicode, u'', 'supposedly_the_encoding') - assert str(e.value) == 'decoding Unicode is not supported' - e = raises(TypeError, unicode, u'', errors='supposedly_the_error') - assert str(e.value) == 'decoding Unicode is not supported' - e = raises(TypeError, unicode, u, 'supposedly_the_encoding') - assert str(e.value) == 'decoding Unicode is not supported' - e = raises(TypeError, unicode, z, 'supposedly_the_encoding') - assert str(e.value) == 'decoding Unicode is not supported' + e = raises(TypeError, str, u'', 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u'', errors='supposedly_the_error') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, z, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -146,6 +146,9 @@ def _generic_name(self): return "str" + def _generic_name(self): + return "str" + def _isupper(self, ch): return unicodedb.isupper(ch) @@ -1219,7 +1222,7 @@ try: rutf8.check_ascii(utf8) except rutf8.CheckError as a: - eh = unicodehelper.encode_error_handler(space) + eh = unicodehelper.encode_error_handler(space) eh(None, "ascii", "ordinal not in range(128)", utf8, a.pos, a.pos + 1) assert False, "always raises" @@ -1257,6 +1260,7 @@ "use codecs.decode() to decode to arbitrary types", encoding, w_retval) + w_retval) return w_retval @@ -1280,7 +1284,6 @@ if space.lookup(w_obj, "__str__") is not None: return space.str(w_obj) return space.repr(w_obj) - def ascii_from_object(space, w_obj): """Implements builtins.ascii()""" # repr is guaranteed to be unicode From pypy.commits at gmail.com Tue Sep 11 16:34:26 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 13:34:26 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix merge Message-ID: <5b9826d2.1c69fb81.7de65.2da8@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95099:afe9a3fb0c6b Date: 2018-09-11 23:33 +0300 http://bitbucket.org/pypy/pypy/changeset/afe9a3fb0c6b/ Log: fix merge diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1222,7 +1222,7 @@ try: rutf8.check_ascii(utf8) except rutf8.CheckError as a: - eh = unicodehelper.encode_error_handler(space) + eh = unicodehelper.encode_error_handler(space) eh(None, "ascii", "ordinal not in range(128)", utf8, a.pos, a.pos + 1) assert False, "always raises" @@ -1260,7 +1260,6 @@ "use codecs.decode() to decode to arbitrary types", encoding, w_retval) - w_retval) return w_retval From pypy.commits at gmail.com Tue Sep 11 17:16:19 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 11 Sep 2018 14:16:19 -0700 (PDT) Subject: [pypy-commit] cffi default: Fix documentation for 70838cb63445 Message-ID: <5b9830a3.1c69fb81.b345b.2815@mx.google.com> Author: Armin Rigo Branch: Changeset: r3159:97a61f7b0bcd Date: 2018-09-11 23:16 +0200 http://bitbucket.org/cffi/cffi/changeset/97a61f7b0bcd/ Log: Fix documentation for 70838cb63445 diff --git a/doc/source/cdef.rst b/doc/source/cdef.rst --- a/doc/source/cdef.rst +++ b/doc/source/cdef.rst @@ -579,13 +579,14 @@ ``NAME.cpython-35m-x86_64-linux-gnu.so``. You can manually rename it to ``NAME.abi3.so``, or use setuptools version 26 or later. Also, note that compiling with a debug version of Python will not actually define -``Py_LIMITED_API``, as doing so makes ``Python.h`` unhappy. Finally, -``Py_LIMITED_API`` is not defined on Windows, because this makes -modules which cannot be used with ``virtualenv`` (issues `#355`__ and -`#350`__). +``Py_LIMITED_API``, as doing so makes ``Python.h`` unhappy. -.. __: https://bitbucket.org/cffi/cffi/issues/355/importerror-dll-load-failed-on-windows -.. __: https://bitbucket.org/cffi/cffi/issues/350/issue-with-py_limited_api-on-windows +*New in version 1.12:* ``Py_LIMITED_API`` is now defined on Windows too. +If you use ``virtualenv``, you need a recent version of it: versions +older than 16.0.0 forgot to copy ``python3.dll`` into the virtual +environment. In case upgrading ``virtualenv`` is a real problem, you +can manually edit the C code to remove the first line ``# define +Py_LIMITED_API``. **ffibuilder.compile(tmpdir='.', verbose=False, debug=None):** explicitly generate the .py or .c file, From pypy.commits at gmail.com Tue Sep 11 17:32:11 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 14:32:11 -0700 (PDT) Subject: [pypy-commit] pypy default: Backed out changeset: 943b0266d564 Message-ID: <5b98345b.1c69fb81.42d5a.667f@mx.google.com> Author: Matti Picus Branch: Changeset: r95100:0fe67a6981d9 Date: 2018-09-11 23:35 +0300 http://bitbucket.org/pypy/pypy/changeset/0fe67a6981d9/ Log: Backed out changeset: 943b0266d564 diff too long, truncating to 2000 out of 24114 lines diff --git a/pypy/module/cpyext/test/test_abstract.py b/pypy/module/cpyext/test/test_abstract.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_abstract.py @@ -0,0 +1,130 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +import pytest + +class AppTestBufferProtocol(AppTestCpythonExtensionBase): + """Tests for the old buffer protocol.""" + + def w_get_buffer_support(self): + return self.import_extension('buffer_support', [ + ("charbuffer_as_string", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """), + ("check_readbuffer", "METH_O", + """ + return PyBool_FromLong(PyObject_CheckReadBuffer(args)); + """), + ("readbuffer_as_string", "METH_O", + """ + const void *ptr; + Py_ssize_t size; + if (PyObject_AsReadBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("writebuffer_as_string", "METH_O", + """ + void *ptr; + Py_ssize_t size; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("zero_out_writebuffer", "METH_O", + """ + void *ptr; + Py_ssize_t size; + Py_ssize_t i; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + for (i = 0; i < size; i++) { + ((char*)ptr)[i] = 0; + } + Py_RETURN_NONE; + """), + ]) + + def test_string(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + + assert buffer_support.check_readbuffer(s) + assert s == buffer_support.readbuffer_as_string(s) + assert raises(TypeError, buffer_support.writebuffer_as_string, s) + assert s == buffer_support.charbuffer_as_string(s) + + def test_buffer(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + buf = buffer(s) + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + def test_mmap(self): + import mmap + buffer_support = self.get_buffer_support() + + s = 'a\0x' + mm = mmap.mmap(-1, 3) + mm[:] = s + + assert buffer_support.check_readbuffer(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + buffer_support.zero_out_writebuffer(mm) + assert s == ''.join(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + ro_mm = mmap.mmap(-1, 3, access=mmap.ACCESS_READ) + assert buffer_support.check_readbuffer(ro_mm) + assert s == buffer_support.readbuffer_as_string(ro_mm) + assert raises(TypeError, buffer_support.writebuffer_as_string, ro_mm) + assert s == buffer_support.charbuffer_as_string(ro_mm) + + def test_array(self): + import array + buffer_support = self.get_buffer_support() + + s = 'a\0x' + a = array.array('B', [5, 0, 10]) + + buffer_support.zero_out_writebuffer(a) + assert list(a) == [0, 0, 0] + + def test_nonbuffer(self): + # e.g. int + buffer_support = self.get_buffer_support() + + assert not buffer_support.check_readbuffer(42) + assert raises(TypeError, buffer_support.readbuffer_as_string, 42) + assert raises(TypeError, buffer_support.writebuffer_as_string, 42) + assert raises(TypeError, buffer_support.charbuffer_as_string, 42) + + def test_user_class(self): + class MyBuf(str): + pass + s = 'a\0x' + buf = MyBuf(s) + buffer_support = self.get_buffer_support() + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_arraymodule.py @@ -0,0 +1,197 @@ +import pytest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.conftest import option + +class AppTestArrayModule(AppTestCpythonExtensionBase): + enable_leak_checking = True + + def setup_class(cls): + from rpython.tool.udir import udir + AppTestCpythonExtensionBase.setup_class.im_func(cls) + if option.runappdirect: + cls.w_udir = str(udir) + else: + cls.w_udir = cls.space.wrap(str(udir)) + + + def test_basic(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + assert arr.typecode == 'i' + assert arr.itemsize == 4 + assert arr[2] == 3 + assert len(arr.buffer_info()) == 2 + exc = raises(TypeError, module.array.append) + errstr = str(exc.value) + assert errstr.startswith("descriptor 'append' of") + arr.append(4) + assert arr.tolist() == [1, 2, 3, 4] + assert len(arr) == 4 + + def test_iter(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + sum = 0 + for i in arr: + sum += i + assert sum == 6 + + def test_index(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[3] == 4 + raises(IndexError, arr.__getitem__, 10) + del arr[2] + assert arr.tolist() == [1, 2, 4] + arr[2] = 99 + assert arr.tolist() == [1, 2, 99] + + def test_slice_get(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[:].tolist() == [1, 2, 3, 4] + assert arr[1:].tolist() == [2, 3, 4] + assert arr[:2].tolist() == [1, 2] + assert arr[1:3].tolist() == [2, 3] + + def test_slice_object(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[slice(1, 3)].tolist() == [2,3] + arr[slice(1, 3)] = module.array('i', [21, 22, 23]) + assert arr.tolist() == [1, 21, 22, 23, 4] + del arr[slice(1, 3)] + assert arr.tolist() == [1, 23, 4] + raises(TypeError, 'arr[slice(1, 3)] = "abc"') + + def test_buffer(self): + import sys + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + buf = buffer(arr) + exc = raises(TypeError, "buf[1] = '1'") + assert str(exc.value) == "buffer is read-only" + if sys.byteorder == 'big': + expected = '\0\0\0\x01' '\0\0\0\x02' '\0\0\0\x03' '\0\0\0\x04' + else: + expected = '\x01\0\0\0' '\x02\0\0\0' '\x03\0\0\0' '\x04\0\0\0' + assert str(buf) == expected + assert str(buffer('a') + arr) == "a" + expected + # python2 special cases empty-buffer + obj + assert str(buffer('') + arr) == "array('i', [1, 2, 3, 4])" + + def test_releasebuffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + module.create_and_release_buffer(arr) + assert module.get_releasebuffer_cnt() == 1 + + def test_Py_buffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + m = memoryview(arr) + assert module.get_releasebuffer_cnt() == 0 + del m + self.debug_collect() + assert module.get_releasebuffer_cnt() == 1 + + def test_pickle(self): + import pickle + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + s = pickle.dumps(arr) + # pypy exports __dict__ on cpyext objects, so the pickle picks up the {} state value + #assert s == "carray\n_reconstruct\np0\n(S'i'\np1\n(lp2\nI1\naI2\naI3\naI4\natp3\nRp4\n." + rra = pickle.loads(s) # rra is arr backwards + #assert arr.tolist() == rra.tolist() + + def test_binop_mul_impl(self): + # check that rmul is called + module = self.import_module(name='array') + arr = module.array('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + module.switch_multiply() + res = [1, 2, 3] * arr + assert res == [2, 4, 6] + + @pytest.mark.xfail + def test_subclass_dealloc(self): + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + module.readbuffer_as_string(arr) + class A(object): + pass + assert not module.same_dealloc(arr, module.array('i', [2])) + assert module.same_dealloc(arr, A()) + + def test_subclass(self): + import struct + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + + val = module.readbuffer_as_string(arr) + assert val == struct.pack('i', 2) + + def test_unicode_readbuffer(self): + # Not really part of array, refactor + import struct + module = self.import_module(name='array') + val = module.readbuffer_as_string('abcd') + assert val == 'abcd' + val = module.readbuffer_as_string(u'\u03a3') + assert val is not None + + def test_readinto(self): + module = self.import_module(name='array') + a = module.array('c') + a.fromstring('0123456789') + filename = self.udir + "/_test_file" + f = open(filename, 'w+b') + f.write('foobar') + f.seek(0) + n = f.readinto(a) + f.close() + assert n == 6 + assert len(a) == 10 + assert a.tostring() == 'foobar6789' + + def test_iowrite(self): + module = self.import_module(name='array') + from io import BytesIO + a = module.array('c') + a.fromstring('0123456789') + fd = BytesIO() + # only test that it works + fd.write(a) + + def test_getitem_via_PySequence_GetItem(self): + module = self.import_module(name='array') + a = module.array('i', range(10)) + # call via tp_as_mapping.mp_subscript + assert 5 == a[-5] + # PySequence_ITEM used to call space.getitem() which + # prefers tp_as_mapping.mp_subscript over tp_as_sequence.sq_item + # Now fixed so this test raises (array_item does not add len(a), + # array_subscr does) + raises(IndexError, module.getitem, a, -5) + + def test_subclass_with_attribute(self): + module = self.import_module(name='array') + class Sub(module.array): + def addattrib(self): + print('called addattrib') + self.attrib = True + import gc + module.subclass_with_attribute(Sub, "addattrib", "attrib", gc.collect) + assert Sub.__module__ == __name__ diff --git a/pypy/module/cpyext/test/test_boolobject.py b/pypy/module/cpyext/test/test_boolobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_boolobject.py @@ -0,0 +1,48 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.boolobject import PyBool_FromLong + +class TestBoolObject(BaseApiTest): + def test_fromlong(self, space): + for i in range(-3, 3): + obj = PyBool_FromLong(space, i) + if i: + assert obj is space.w_True + else: + assert obj is space.w_False + +class AppTestBoolMacros(AppTestCpythonExtensionBase): + def test_macros(self): + module = self.import_extension('foo', [ + ("get_true", "METH_NOARGS", "Py_RETURN_TRUE;"), + ("get_false", "METH_NOARGS", "Py_RETURN_FALSE;"), + ]) + assert module.get_true() == True + assert module.get_false() == False + + def test_toint(self): + module = self.import_extension('foo', [ + ("to_int", "METH_O", + ''' + if (args->ob_type->tp_as_number && args->ob_type->tp_as_number->nb_int) { + return args->ob_type->tp_as_number->nb_int(args); + } + else { + PyErr_SetString(PyExc_TypeError,"cannot convert bool to int"); + return NULL; + } + '''), ]) + assert module.to_int(False) == 0 + assert module.to_int(True) == 1 + + def test_check(self): + module = self.import_extension('foo', [ + ("type_check", "METH_O", + ''' + return PyLong_FromLong(PyBool_Check(args)); + ''')]) + assert module.type_check(True) + assert module.type_check(False) + assert not module.type_check(None) + assert not module.type_check(1.0) + diff --git a/pypy/module/cpyext/test/test_borrow.py b/pypy/module/cpyext/test/test_borrow.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_borrow.py @@ -0,0 +1,71 @@ +import py +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.pyobject import make_ref + + +class AppTestBorrow(AppTestCpythonExtensionBase): + def test_tuple_borrowing(self): + module = self.import_extension('foo', [ + ("test_borrowing", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *f = PyFloat_FromDouble(42.0); + PyObject *g = NULL; + printf("Refcnt1: %ld\\n", f->ob_refcnt); + PyTuple_SetItem(t, 0, f); // steals reference + printf("Refcnt2: %ld\\n", f->ob_refcnt); + f = PyTuple_GetItem(t, 0); // borrows reference + printf("Refcnt3: %ld\\n", f->ob_refcnt); + g = PyTuple_GetItem(t, 0); // borrows reference again + printf("Refcnt4: %ld\\n", f->ob_refcnt); + printf("COMPARE: %i\\n", f == g); + fflush(stdout); + Py_DECREF(t); + Py_RETURN_TRUE; + """), + ]) + assert module.test_borrowing() # the test should not leak + + def test_borrow_destroy(self): + module = self.import_extension('foo', [ + ("test_borrow_destroy", "METH_NOARGS", + """ + PyObject *i = PyInt_FromLong(42); + PyObject *j; + PyObject *t1 = PyTuple_Pack(1, i); + PyObject *t2 = PyTuple_Pack(1, i); + Py_DECREF(i); + + i = PyTuple_GetItem(t1, 0); + PyTuple_GetItem(t2, 0); + Py_DECREF(t2); + + j = PyInt_FromLong(PyInt_AsLong(i)); + Py_DECREF(t1); + return j; + """), + ]) + assert module.test_borrow_destroy() == 42 + + def test_double_borrow(self): + if self.runappdirect: + py.test.xfail('segfault') + module = self.import_extension('foo', [ + ("run", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *s = PyRun_String("set()", Py_eval_input, + Py_None, Py_None); + PyObject *w = PyWeakref_NewRef(s, Py_None); + PyTuple_SetItem(t, 0, s); + PyTuple_GetItem(t, 0); + PyTuple_GetItem(t, 0); + Py_DECREF(t); + return w; + """), + ]) + wr = module.run() + # check that the set() object was deallocated + self.debug_collect() + assert wr() is None diff --git a/pypy/module/cpyext/test/test_bufferobject.py b/pypy/module/cpyext/test/test_bufferobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bufferobject.py @@ -0,0 +1,123 @@ +from rpython.rtyper.lltypesystem import lltype +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.api import PyObject + +class AppTestBufferObject(AppTestCpythonExtensionBase): + + def test_FromMemory(self): + module = self.import_extension('foo', [ + ("get_FromMemory", "METH_NOARGS", + """ + cbuf = malloc(4); + cbuf[0] = 'a'; + cbuf[1] = 'b'; + cbuf[2] = 'c'; + cbuf[3] = '\\0'; + return PyBuffer_FromMemory(cbuf, 4); + """), + ("free_buffer", "METH_NOARGS", + """ + free(cbuf); + Py_RETURN_NONE; + """), + ("check_ascharbuffer", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """) + ], prologue = """ + static char* cbuf = NULL; + """) + buf = module.get_FromMemory() + assert str(buf) == 'abc\0' + + assert module.check_ascharbuffer(buf) == 'abc\0' + + module.free_buffer() + + def test_Buffer_New(self): + module = self.import_extension('foo', [ + ("buffer_new", "METH_NOARGS", + """ + return PyBuffer_New(150); + """), + ]) + b = module.buffer_new() + raises(AttributeError, getattr, b, 'x') + + def test_array_buffer(self): + if self.runappdirect: + skip('PyBufferObject not available outside buffer object.c') + module = self.import_extension('foo', [ + ("roundtrip", "METH_O", + """ + PyBufferObject *buf = (PyBufferObject *)args; + return PyString_FromStringAndSize(buf->b_ptr, buf->b_size); + """), + ]) + import array + a = array.array('c', 'text') + b = buffer(a) + assert module.roundtrip(b) == 'text' + + + def test_issue2752(self): + iterations = 10 + if self.runappdirect: + iterations = 2000 + module = self.import_extension('foo', [ + ("test_mod", 'METH_VARARGS', + """ + PyObject *obj; + Py_buffer bp; + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + + if (((unsigned char*)bp.buf)[0] != '0') { + void * buf = (void*)bp.buf; + unsigned char val[4]; + char * s = PyString_AsString(obj); + memcpy(val, bp.buf, 4); + PyBuffer_Release(&bp); + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + PyErr_Format(PyExc_ValueError, + "mismatch: %p [%x %x %x %x...] now %p [%x %x %x %x...] as str '%s'", + buf, val[0], val[1], val[2], val[3], + (void *)bp.buf, + ((unsigned char*)bp.buf)[0], + ((unsigned char*)bp.buf)[1], + ((unsigned char*)bp.buf)[2], + ((unsigned char*)bp.buf)[3], + s); + PyBuffer_Release(&bp); + return NULL; + } + + PyBuffer_Release(&bp); + Py_RETURN_NONE; + """), + ]) + bufsize = 4096 + def getdata(bufsize): + data = b'01234567' + for x in range(18): + data += data + if len(data) >= bufsize: + break + return data + for j in range(iterations): + block = getdata(bufsize) + assert block[:8] == '01234567' + try: + module.test_mod(block) + except ValueError as e: + print("%s at it=%d" % (e, j)) + assert False diff --git a/pypy/module/cpyext/test/test_bytearrayobject.py b/pypy/module/cpyext/test/test_bytearrayobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytearrayobject.py @@ -0,0 +1,188 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestStringObject(AppTestCpythonExtensionBase): + def test_basic(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize("Hello world", 12); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyByteArray_FromStringAndSize("Hello world", 12); + int result = 0; + + if(PyByteArray_Size(s) == 12) { + result = 1; + } + Py_DECREF(s); + return PyBool_FromLong(result); + """), + ("test_is_bytearray", "METH_VARARGS", + """ + return PyBool_FromLong(PyByteArray_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world\x00' + assert module.test_Size() + assert module.test_is_bytearray(bytearray(b"")) + assert not module.test_is_bytearray(()) + + def test_bytearray_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyByteArray_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyByteArray_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyByteArray_AsString(s); + if (c == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytearray() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytearray_mutable(self): + module = self.import_extension('foo', [ + ("mutable", "METH_NOARGS", + """ + PyObject *base; + base = PyByteArray_FromStringAndSize("test", 10); + if (PyByteArray_GET_SIZE(base) != 10) + return PyLong_FromLong(-PyByteArray_GET_SIZE(base)); + memcpy(PyByteArray_AS_STRING(base), "works", 6); + Py_INCREF(base); + return base; + """), + ]) + s = module.mutable() + if s == b'\x00' * 10: + assert False, "no RW access to bytearray" + assert s[:6] == b'works\x00' + + def test_AsByteArray(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + const char *c; + PyObject *s2, *s1 = PyByteArray_FromStringAndSize("test", 4); + if (s1 == NULL) + return NULL; + c = PyByteArray_AsString(s1); + s2 = PyByteArray_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytearray() + assert s == b'test' + + def test_manipulations(self): + import sys + module = self.import_extension('foo', [ + ("bytearray_from_bytes", "METH_VARARGS", + ''' + return PyByteArray_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("bytes_from_bytearray", "METH_VARARGS", + ''' + char * buf; + int n; + PyObject * obj; + obj = PyTuple_GetItem(args, 0); + buf = PyByteArray_AsString(obj); + if (buf == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + n = PyByteArray_Size(obj); + return PyBytes_FromStringAndSize(buf, n); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject * ret, *right, *left; + PyObject *ba1, *ba2; + if (!PyArg_ParseTuple(args, "OO", &left, &right)) { + return PyUnicode_FromString("parse failed"); + } + ba1 = PyByteArray_FromObject(left); + ba2 = PyByteArray_FromObject(right); + if (ba1 == NULL || ba2 == NULL) + { + /* exception should be set */ + return NULL; + } + ret = PyByteArray_Concat(ba1, ba2); + return ret; + """)]) + assert module.bytearray_from_bytes(b"huheduwe") == b"huhe" + assert module.bytes_from_bytearray(bytearray(b'abc')) == b'abc' + if '__pypy__' in sys.builtin_module_names: + # CPython only makes an assert. + raises(ValueError, module.bytes_from_bytearray, 4.0) + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + assert not isinstance(ret, str) + assert isinstance(ret, bytearray) + raises(TypeError, module.concat, b'abc', u'def') + + def test_bytearray_resize(self): + module = self.import_extension('foo', [ + ("bytearray_resize", "METH_VARARGS", + ''' + PyObject *obj, *ba; + int newsize, oldsize, ret; + if (!PyArg_ParseTuple(args, "Oi", &obj, &newsize)) { + return PyUnicode_FromString("parse failed"); + } + + ba = PyByteArray_FromObject(obj); + if (ba == NULL) + return NULL; + oldsize = PyByteArray_Size(ba); + if (oldsize == 0) + { + return PyUnicode_FromString("oldsize is 0"); + } + ret = PyByteArray_Resize(ba, newsize); + if (ret != 0) + { + printf("ret, oldsize, newsize= %d, %d, %d\\n", ret, oldsize, newsize); + return NULL; + } + return ba; + ''' + )]) + ret = module.bytearray_resize(b'abc', 6) + assert len(ret) == 6,"%s, len=%d" % (ret, len(ret)) + assert ret == b'abc\x00\x00\x00' + ret = module.bytearray_resize(b'abcdefghi', 4) + assert len(ret) == 4,"%s, len=%d" % (ret, len(ret)) + assert ret == b'abcd' diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -0,0 +1,611 @@ +# encoding: utf-8 +import pytest +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.interpreter.error import OperationError +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.bytesobject import ( + new_empty_str, PyBytesObject, _PyString_Resize, PyString_Concat, + PyString_ConcatAndDel, PyString_Format, PyString_InternFromString, + PyString_AsEncodedObject, PyString_AsDecodedObject, _PyString_Eq, + _PyString_Join) +from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call +from pypy.module.cpyext.pyobject import decref, from_ref, make_ref +from pypy.module.cpyext.buffer import PyObject_AsCharBuffer +from pypy.module.cpyext.api import PyTypeObjectPtr + + +class AppTestBytesObject(AppTestCpythonExtensionBase): + def test_bytesobject(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyBytes_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyBytes_FromString("Hello world"); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyBytes_FromString("Hello world"); + int result = PyBytes_Size(s); + + Py_DECREF(s); + return PyLong_FromLong(result); + """), + ("test_Size_exception", "METH_NOARGS", + """ + PyObject* f = PyFloat_FromDouble(1.0); + PyBytes_Size(f); + + Py_DECREF(f); + return NULL; + """), + ("test_is_bytes", "METH_VARARGS", + """ + return PyBool_FromLong(PyBytes_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world' + assert module.test_Size() == 11 + raises(TypeError, module.test_Size_exception) + + assert module.test_is_bytes(b"") + assert not module.test_is_bytes(()) + + def test_bytes_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyBytes_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyBytes_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyBytes_AS_STRING(s); + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytes() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytes_tp_alloc(self): + module = self.import_extension('foo', [ + ("tpalloc", "METH_NOARGS", + """ + PyObject *base; + PyTypeObject * type; + PyBytesObject *obj; + base = PyBytes_FromString("test"); + if (PyBytes_GET_SIZE(base) != 4) + return PyLong_FromLong(-PyBytes_GET_SIZE(base)); + type = base->ob_type; + if (type->tp_itemsize != 1) + return PyLong_FromLong(type->tp_itemsize); + obj = (PyBytesObject*)type->tp_alloc(type, 10); + if (PyBytes_GET_SIZE(obj) != 10) + return PyLong_FromLong(PyBytes_GET_SIZE(obj)); + /* cannot work, there is only RO access + memcpy(PyBytes_AS_STRING(obj), "works", 6); */ + Py_INCREF(obj); + return (PyObject*)obj; + """), + ('alloc_rw', "METH_NOARGS", + ''' + PyObject *obj = (PyObject*)_PyObject_NewVar(&PyBytes_Type, 10); + memcpy(PyBytes_AS_STRING(obj), "works", 6); + return (PyObject*)obj; + '''), + ]) + s = module.alloc_rw() + assert s[:6] == b'works\0' # s[6:10] contains random garbage + s = module.tpalloc() + assert s == b'\x00' * 10 + + def test_AsString(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + char *c; + PyObject* s2, *s1 = PyBytes_FromStringAndSize("test", 4); + c = PyBytes_AsString(s1); + s2 = PyBytes_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytes() + assert s == b'test' + + def test_manipulations(self): + module = self.import_extension('foo', [ + ("bytes_as_string", "METH_VARARGS", + ''' + return PyBytes_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject ** v; + PyObject * left = PyTuple_GetItem(args, 0); + Py_INCREF(left); /* the reference will be stolen! */ + v = &left; + PyBytes_Concat(v, PyTuple_GetItem(args, 1)); + return *v; + """)]) + assert module.bytes_as_string(b"huheduwe") == b"huhe" + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + ret = module.concat('abc', u'def') + assert not isinstance(ret, str) + assert isinstance(ret, unicode) + assert ret == 'abcdef' + + def test_py_bytes_as_string_None(self): + module = self.import_extension('foo', [ + ("string_None", "METH_VARARGS", + ''' + if (PyBytes_AsString(Py_None)) { + Py_RETURN_NONE; + } + return NULL; + ''' + )]) + raises(TypeError, module.string_None) + + def test_AsStringAndSize(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject* s1 = PyBytes_FromStringAndSize("te\\0st", 5); + char *buf; + Py_ssize_t len; + if (PyBytes_AsStringAndSize(s1, &buf, &len) < 0) + return NULL; + if (len != 5) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + if (PyBytes_AsStringAndSize(s1, &buf, NULL) >= 0) { + PyErr_SetString(PyExc_AssertionError, "Should Have failed"); + return NULL; + } + PyErr_Clear(); + Py_DECREF(s1); + Py_INCREF(Py_None); + return Py_None; + """), + ("c_only", "METH_NOARGS", + """ + int ret; + char * buf2; + PyObject * obj = PyBytes_FromStringAndSize(NULL, 1024); + if (!obj) + return NULL; + buf2 = PyBytes_AsString(obj); + if (!buf2) + return NULL; + /* buf should not have been forced, issue #2395 */ + ret = _PyBytes_Resize(&obj, 512); + if (ret < 0) + return NULL; + Py_DECREF(obj); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getbytes() + module.c_only() + + def test_py_string_as_string_Unicode(self): + module = self.import_extension('foo', [ + ("getstring_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + buf = PyString_AsString(u1); + if (buf == NULL) + return NULL; + if (buf[3] != 't') { + PyErr_SetString(PyExc_AssertionError, "Bad conversion"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ("getstringandsize_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + Py_ssize_t len; + if (PyString_AsStringAndSize(u1, &buf, &len) < 0) + return NULL; + if (len != 4) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getstring_unicode() + module.getstringandsize_unicode() + + def test_format_v(self): + module = self.import_extension('foo', [ + ("test_string_format_v", "METH_VARARGS", + ''' + return helper("bla %d ble %s\\n", + PyInt_AsLong(PyTuple_GetItem(args, 0)), + PyString_AsString(PyTuple_GetItem(args, 1))); + ''' + ) + ], prologue=''' + PyObject* helper(char* fmt, ...) + { + va_list va; + PyObject* res; + va_start(va, fmt); + res = PyString_FromFormatV(fmt, va); + va_end(va); + return res; + } + ''') + res = module.test_string_format_v(1, "xyz") + assert res == "bla 1 ble xyz\n" + + def test_format(self): + module = self.import_extension('foo', [ + ("test_string_format", "METH_VARARGS", + ''' + return PyString_FromFormat("bla %d ble %s\\n", + PyInt_AsLong(PyTuple_GetItem(args, 0)), + PyString_AsString(PyTuple_GetItem(args, 1))); + ''' + ) + ]) + res = module.test_string_format(1, "xyz") + assert res == "bla 1 ble xyz\n" + + def test_intern_inplace(self): + module = self.import_extension('foo', [ + ("test_intern_inplace", "METH_O", + ''' + PyObject *s = args; + Py_INCREF(s); + PyString_InternInPlace(&s); + if (((PyBytesObject*)s)->ob_sstate == SSTATE_NOT_INTERNED) + { + Py_DECREF(s); + s = PyString_FromString("interned error"); + } + return s; + ''' + ) + ]) + # This does not test much, but at least the refcounts are checked. + assert module.test_intern_inplace('s') == 's' + + def test_bytes_macros(self): + """The PyString_* macros cast, and calls expecting that build.""" + module = self.import_extension('foo', [ + ("test_macro_invocations", "METH_NOARGS", + """ + PyObject* o = PyString_FromString(""); + PyBytesObject* u = (PyBytesObject*)o; + + PyString_GET_SIZE(u); + PyString_GET_SIZE(o); + + PyString_AS_STRING(o); + PyString_AS_STRING(u); + + return o; + """)]) + assert module.test_macro_invocations() == '' + + def test_hash_and_state(self): + module = self.import_extension('foo', [ + ("test_hash", "METH_VARARGS", + ''' + PyObject* obj = (PyTuple_GetItem(args, 0)); + long hash = ((PyBytesObject*)obj)->ob_shash; + return PyLong_FromLong(hash); + ''' + ), + ("test_sstate", "METH_NOARGS", + ''' + PyObject *s = PyString_FromString("xyz"); + /*int sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + PyString_InternInPlace(&s); + /*sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + Py_DECREF(s); + return PyBool_FromLong(1); + '''), + ], prologue='#include ') + res = module.test_hash("xyz") + assert res == hash('xyz') + # doesn't really test, but if printf is enabled will prove sstate + assert module.test_sstate() + + def test_subclass(self): + # taken from PyStringArrType_Type in numpy's scalartypes.c.src + module = self.import_extension('bar', [ + ("newsubstr", "METH_O", + """ + PyObject * obj; + char * data; + int len; + + data = PyString_AS_STRING(args); + len = PyString_GET_SIZE(args); + if (data == NULL) + Py_RETURN_NONE; + obj = PyArray_Scalar(data, len); + return obj; + """), + ("get_len", "METH_O", + """ + return PyLong_FromLong(PyObject_Size(args)); + """), + ('has_nb_add', "METH_O", + ''' + if (args->ob_type->tp_as_number == NULL) { + Py_RETURN_FALSE; + } + if (args->ob_type->tp_as_number->nb_add == NULL) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + '''), + ], prologue=""" + #include + PyTypeObject PyStringArrType_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "bar.string_", /* tp_name*/ + sizeof(PyBytesObject), /* tp_basicsize*/ + 0 /* tp_itemsize */ + }; + + static PyObject * + stringtype_repr(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + static PyObject * + stringtype_str(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + PyObject * + PyArray_Scalar(char *data, int n) + { + PyTypeObject *type = &PyStringArrType_Type; + PyObject *obj; + void *destptr; + int itemsize = n; + obj = type->tp_alloc(type, itemsize); + if (obj == NULL) { + return NULL; + } + destptr = PyString_AS_STRING(obj); + ((PyBytesObject *)obj)->ob_shash = -1; + memcpy(destptr, data, itemsize); + return obj; + } + """, more_init = ''' + PyStringArrType_Type.tp_alloc = NULL; + PyStringArrType_Type.tp_free = NULL; + + PyStringArrType_Type.tp_repr = stringtype_repr; + PyStringArrType_Type.tp_str = stringtype_str; + PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; + PyStringArrType_Type.tp_itemsize = sizeof(char); + PyStringArrType_Type.tp_base = &PyString_Type; + PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; + if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; + ''') + + a = module.newsubstr('abc') + assert module.has_nb_add('a') is False + assert module.has_nb_add(a) is False + assert type(a).__name__ == 'string_' + assert a == 'abc' + assert 3 == module.get_len(a) + b = module.newsubstr('') + assert 0 == module.get_len(b) + +class TestBytes(BaseApiTest): + def test_bytes_resize(self, space): + py_str = new_empty_str(space, 10) + ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + py_str.c_ob_sval[0] = 'a' + py_str.c_ob_sval[1] = 'b' + py_str.c_ob_sval[2] = 'c' + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 3) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 3 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[3] == '\x00' + # the same for growing + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 10) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 10 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[10] == '\x00' + decref(space, ar[0]) + lltype.free(ar, flavor='raw') + + def test_string_buffer(self, space): + py_str = new_empty_str(space, 10) + c_buf = py_str.c_ob_type.c_tp_as_buffer + assert c_buf + py_obj = rffi.cast(PyObject, py_str) + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, lltype.nullptr(Py_ssize_tP.TO)) == 1 + ref = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, ref) == 1 + assert ref[0] == 10 + lltype.free(ref, flavor='raw') + ref = lltype.malloc(rffi.VOIDPP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getreadbuffer, + py_obj, 0, ref) == 10 + lltype.free(ref, flavor='raw') + decref(space, py_obj) + + def test_Concat(self, space): + ref = make_ref(space, space.wrap('abc')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref + prev_refcnt = ref.c_ob_refcnt + PyString_Concat(space, ptr, space.wrap('def')) + assert ref.c_ob_refcnt == prev_refcnt - 1 + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + with pytest.raises(OperationError): + PyString_Concat(space, ptr, space.w_None) + assert not ptr[0] + ptr[0] = lltype.nullptr(PyObject.TO) + PyString_Concat(space, ptr, space.wrap('def')) # should not crash + lltype.free(ptr, flavor='raw') + + def test_ConcatAndDel(self, space): + ref1 = make_ref(space, space.wrap('abc')) + ref2 = make_ref(space, space.wrap('def')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref1 + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + assert ref2.c_ob_refcnt == prev_refcnf - 1 + decref(space, ptr[0]) + ptr[0] = lltype.nullptr(PyObject.TO) + ref2 = make_ref(space, space.wrap('foo')) + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) # should not crash + assert ref2.c_ob_refcnt == prev_refcnf - 1 + lltype.free(ptr, flavor='raw') + + def test_format(self, space): + assert "1 2" == space.unwrap( + PyString_Format(space, space.wrap('%s %d'), space.wrap((1, 2)))) + + def test_asbuffer(self, space): + bufp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + + w_text = space.wrap("text") + ref = make_ref(space, w_text) + prev_refcnt = ref.c_ob_refcnt + assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 + assert ref.c_ob_refcnt == prev_refcnt + assert lenp[0] == 4 + assert rffi.charp2str(bufp[0]) == 'text' + lltype.free(bufp, flavor='raw') + lltype.free(lenp, flavor='raw') + decref(space, ref) + + def test_intern(self, space): + buf = rffi.str2charp("test") + w_s1 = PyString_InternFromString(space, buf) + w_s2 = PyString_InternFromString(space, buf) + rffi.free_charp(buf) + assert w_s1 is w_s2 + + def test_AsEncodedObject(self, space): + ptr = space.wrap('abc') + + errors = rffi.str2charp("strict") + + encoding = rffi.str2charp("hex") + res = PyString_AsEncodedObject(space, ptr, encoding, errors) + assert space.unwrap(res) == "616263" + + res = PyString_AsEncodedObject(space, + ptr, encoding, lltype.nullptr(rffi.CCHARP.TO)) + assert space.unwrap(res) == "616263" + rffi.free_charp(encoding) + + encoding = rffi.str2charp("unknown_encoding") + with raises_w(space, LookupError): + PyString_AsEncodedObject(space, ptr, encoding, errors) + rffi.free_charp(encoding) + + rffi.free_charp(errors) + + NULL = lltype.nullptr(rffi.CCHARP.TO) + res = PyString_AsEncodedObject(space, ptr, NULL, NULL) + assert space.unwrap(res) == "abc" + with raises_w(space, TypeError): + PyString_AsEncodedObject(space, space.wrap(2), NULL, NULL) + + def test_AsDecodedObject(self, space): + w_str = space.wrap('caf\xe9') + encoding = rffi.str2charp("latin-1") + w_res = PyString_AsDecodedObject(space, w_str, encoding, None) + rffi.free_charp(encoding) + assert space.unwrap(w_res) == u"caf\xe9" + + def test_eq(self, space): + assert 1 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("hello")) + assert 0 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("world")) + + def test_join(self, space): + w_sep = space.wrap('') + w_seq = space.wrap(['a', 'b']) + w_joined = _PyString_Join(space, w_sep, w_seq) + assert space.unwrap(w_joined) == 'ab' diff --git a/pypy/module/cpyext/test/test_capsule.py b/pypy/module/cpyext/test/test_capsule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_capsule.py @@ -0,0 +1,29 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + +class AppTestCapsule(AppTestCpythonExtensionBase): + def test_capsule_import(self): + module = self.import_extension('foo', [ + ("set_ptr", "METH_O", + """ + PyObject *capsule, *module; + void *ptr = PyLong_AsVoidPtr(args); + if (PyErr_Occurred()) return NULL; + capsule = PyCapsule_New(ptr, "foo._ptr", NULL); + if (PyErr_Occurred()) return NULL; + module = PyImport_ImportModule("foo"); + PyModule_AddObject(module, "_ptr", capsule); + Py_DECREF(module); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; + """), + ("get_ptr", "METH_NOARGS", + """ + void *ptr = PyCapsule_Import("foo._ptr", 0); + if (PyErr_Occurred()) return NULL; + return PyLong_FromVoidPtr(ptr); + """)]) + module.set_ptr(1234) + assert 'capsule object "foo._ptr" at ' in str(module._ptr) + import gc; gc.collect() + assert module.get_ptr() == 1234 + del module._ptr diff --git a/pypy/module/cpyext/test/test_cell.py b/pypy/module/cpyext/test/test_cell.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cell.py @@ -0,0 +1,20 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestCell(AppTestCpythonExtensionBase): + def test_cell_type(self): + module = self.import_extension('foo', [ + ("cell_type", "METH_O", + """ + PyDict_SetItemString(args, "cell", (PyObject*)&PyCell_Type); + Py_RETURN_NONE; + """)]) + d = {} + module.cell_type(d) + def f(o): + def g(): + return o + return g + + cell_type = type(f(0).func_closure[0]) + assert d["cell"] is cell_type diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_classobject.py @@ -0,0 +1,93 @@ +from pypy.interpreter.function import Function +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.classobject import ( + PyClass_Check, PyClass_New, PyInstance_Check, PyInstance_New, + PyInstance_NewRaw, _PyInstance_Lookup) +from pypy.module.cpyext.object import PyObject_GetAttr +from pypy.module.cpyext.pyobject import get_w_obj_and_decref + +class TestClassObject(BaseApiTest): + def test_newinstance(self, space): + w_class = space.appexec([], """(): + class C: + x = None + def __init__(self, *args, **kwargs): + self.x = 1 + self.args = args + self.__dict__.update(kwargs) + return C + """) + + assert PyClass_Check(space, w_class) + + w_instance = PyInstance_NewRaw(space, w_class, None) + assert PyInstance_Check(space, w_instance) + assert space.getattr(w_instance, space.wrap('x')) is space.w_None + + w_instance = PyInstance_NewRaw(space, w_class, space.wrap(dict(a=3))) + assert space.getattr(w_instance, space.wrap('x')) is space.w_None + assert space.unwrap(space.getattr(w_instance, space.wrap('a'))) == 3 + + w_instance = PyInstance_New(space, w_class, + space.wrap((3,)), space.wrap(dict(y=2))) + assert space.unwrap(space.getattr(w_instance, space.wrap('x'))) == 1 + assert space.unwrap(space.getattr(w_instance, space.wrap('y'))) == 2 + assert space.unwrap(space.getattr(w_instance, space.wrap('args'))) == (3,) + + def test_lookup(self, space): + w_instance = space.appexec([], """(): + class C: + def __init__(self): + self.x = None + def f(self): pass + return C() + """) + + assert PyInstance_Check(space, w_instance) + py_obj = PyObject_GetAttr(space, w_instance, space.wrap('x')) + assert get_w_obj_and_decref(space, py_obj) is space.w_None + assert _PyInstance_Lookup(space, w_instance, space.wrap('x')) is space.w_None + assert _PyInstance_Lookup(space, w_instance, space.wrap('y')) is None + + # getattr returns a bound method + py_obj = PyObject_GetAttr(space, w_instance, space.wrap('f')) + assert not isinstance(get_w_obj_and_decref(space, py_obj), Function) + # _PyInstance_Lookup returns the raw descriptor + assert isinstance( + _PyInstance_Lookup(space, w_instance, space.wrap('f')), Function) + + def test_pyclass_new(self, space): + w_bases = space.newtuple([]) + w_dict = space.newdict() + w_name = space.wrap("C") + w_class = PyClass_New(space, w_bases, w_dict, w_name) + assert not space.isinstance_w(w_class, space.w_type) + w_instance = space.call_function(w_class) + assert PyInstance_Check(space, w_instance) + assert space.is_true(space.call_method(space.builtin, "isinstance", + w_instance, w_class)) + +class AppTestStringObject(AppTestCpythonExtensionBase): + def test_class_type(self): + module = self.import_extension('foo', [ + ("get_classtype", "METH_NOARGS", + """ + Py_INCREF(&PyClass_Type); + return (PyObject*)&PyClass_Type; + """)]) + class C: + pass + assert module.get_classtype() is type(C) + + def test_pyclass_new_no_bases(self): + module = self.import_extension('foo', [ + ("new_foo", "METH_O", + """ + return PyClass_New(NULL, PyDict_New(), args); + """)]) + FooClass = module.new_foo("FooClass") + class Cls1: + pass + assert type(FooClass) is type(Cls1) + assert FooClass.__bases__ == Cls1.__bases__ diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_codecs.py @@ -0,0 +1,15 @@ +# encoding: iso-8859-15 +from pypy.module.cpyext.test.test_api import BaseApiTest +from rpython.rtyper.lltypesystem import rffi +from pypy.module.cpyext.codecs import ( + PyCodec_IncrementalEncoder, PyCodec_IncrementalDecoder) + +class TestCodecs(BaseApiTest): + def test_incremental(self, space): + utf8 = rffi.str2charp('utf-8') + w_encoder = PyCodec_IncrementalEncoder(space, utf8, None) + w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) + w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) + w_decoded = space.call_method(w_decoder, 'decode', w_encoded) + assert space.unicode_w(w_decoded) == u'späm' + rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_complexobject.py b/pypy/module/cpyext/test/test_complexobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_complexobject.py @@ -0,0 +1,64 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.complexobject import ( + PyComplex_FromDoubles, PyComplex_RealAsDouble, PyComplex_ImagAsDouble) + +class TestComplexObject(BaseApiTest): + def test_complexobject(self, space): + w_value = PyComplex_FromDoubles(space, 1.2, 3.4) + assert space.unwrap(w_value) == 1.2+3.4j + assert PyComplex_RealAsDouble(space, w_value) == 1.2 + assert PyComplex_ImagAsDouble(space, w_value) == 3.4 + + assert PyComplex_RealAsDouble(space, space.wrap(42)) == 42 + assert PyComplex_RealAsDouble(space, space.wrap(1.5)) == 1.5 + assert PyComplex_ImagAsDouble(space, space.wrap(1.5)) == 0.0 + + # cpython accepts anything for PyComplex_ImagAsDouble + assert PyComplex_ImagAsDouble(space, space.w_None) == 0.0 + with raises_w(space, TypeError): + PyComplex_RealAsDouble(space, space.w_None) + +class AppTestCComplex(AppTestCpythonExtensionBase): + def test_AsCComplex(self): + module = self.import_extension('foo', [ + ("as_tuple", "METH_O", + """ + Py_complex c = PyComplex_AsCComplex(args); + if (PyErr_Occurred()) return NULL; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.as_tuple(12-34j) == (12, -34) + assert module.as_tuple(-3.14) == (-3.14, 0.0) + raises(TypeError, module.as_tuple, "12") + + def test_FromCComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + return PyComplex_FromCComplex(c); + """)]) + assert module.test() == 1.2 + 3.4j + + def test_PyComplex_to_WComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + PyObject *obj = PyObject_Malloc(sizeof(PyComplexObject)); + obj = PyObject_Init(obj, &PyComplex_Type); + assert(obj != NULL); + ((PyComplexObject *)obj)->cval = c; + return obj; + """)]) + assert module.test() == 1.2 + 3.4j + + def test_WComplex_to_PyComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_O", + """ + Py_complex c = ((PyComplexObject *)args)->cval; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.test(1.2 + 3.4j) == (1.2, 3.4) diff --git a/pypy/module/cpyext/test/test_cparser.py b/pypy/module/cpyext/test/test_cparser.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cparser.py @@ -0,0 +1,260 @@ +from rpython.flowspace.model import const +from rpython.flowspace.objspace import build_flow +from rpython.translator.simplify import simplify_graph +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.cparser import parse_source, CTypeSpace + +def test_configure(): + decl = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + TestFloatObject = cts.definitions['TestFloatObject'] + assert isinstance(TestFloatObject, lltype.Struct) + assert TestFloatObject.c_ob_refcnt == rffi.SSIZE_T + assert TestFloatObject.c_ob_pypy_link == rffi.SSIZE_T + assert TestFloatObject.c_ob_fval == rffi.DOUBLE + +def test_simple(): + decl = "typedef ssize_t Py_ssize_t;" + cts = parse_source(decl) + assert cts.definitions == {'Py_ssize_t': rffi.SSIZE_T} + +def test_macro(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } PyFloatObject; + """ + cts = parse_source(decl) + assert 'PyFloatObject' in cts.definitions + assert 'PyObject_HEAD' in cts.macros + +def test_include(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts1 = parse_source(cdef1) + Type = cts1.definitions['Type'] + assert isinstance(Type, lltype.Struct) + cts2 = parse_source(cdef2, includes=[cts1]) + assert 'Type' not in cts2.definitions + Object = cts2.definitions['Object'] + assert Object.c_type.TO is Type + +def test_multiple_sources(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts = CTypeSpace() + cts.parse_source(cdef1) + Type = cts.definitions['Type'] + assert isinstance(Type, lltype.Struct) + assert 'Object' not in cts.definitions + cts.parse_source(cdef2) + Object = cts.definitions['Object'] + assert Object.c_type.TO is Type + +def test_incomplete(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + """ + cts = parse_source(cdef) + Object = cts.gettype('Object') + assert isinstance(Object, lltype.Struct) + +def test_recursive(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + typedef struct _typeobject { + Object *obj; + } Type; + """ + cts = parse_source(cdef) + Object = cts.definitions['Object'] + assert isinstance(Object, lltype.Struct) + hash(Object) + +def test_nested_struct(): + cdef = """ + typedef struct { + int x; + } foo; + typedef struct { + foo y; + } bar; + """ + cts = parse_source(cdef) + bar = cts.gettype('bar') + assert isinstance(bar, lltype.Struct) + hash(bar) # bar is hashable + +def test_const(): + cdef = """ + typedef struct { + const char * const foo; + } bar; + """ + cts = parse_source(cdef) + assert cts.definitions['bar'].c_foo == rffi.CONST_CCHARP != rffi.CCHARP + +def test_gettype(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + assert cts.gettype('Py_ssize_t') == rffi.SSIZE_T + assert cts.gettype('TestFloatObject *').TO.c_ob_refcnt == rffi.SSIZE_T + assert cts.cast('Py_ssize_t', 42) == rffi.cast(rffi.SSIZE_T, 42) + +def test_parse_funcdecl(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + + typedef TestFloatObject* (*func_t)(int, int); + """ + cts = parse_source(decl) + func_decl = cts.parse_func("func_t * some_func(TestFloatObject*)") + assert func_decl.name == 'some_func' + assert func_decl.get_llresult(cts) == cts.gettype('func_t*') + assert func_decl.get_llargs(cts) == [cts.gettype('TestFloatObject *')] + +def test_write_func(): + from ..api import ApiFunction + from rpython.translator.c.database import LowLevelDatabase + db = LowLevelDatabase() + cdef = """ + typedef ssize_t Py_ssize_t; + """ + cts = parse_source(cdef) + cdecl = "Py_ssize_t * some_func(Py_ssize_t*)" + decl = cts.parse_func(cdecl) + api_function = ApiFunction( + decl.get_llargs(cts), decl.get_llresult(cts), lambda space, x: None, + cdecl=decl) + assert (api_function.get_api_decl('some_func', db) == + "PyAPI_FUNC(Py_ssize_t *) some_func(Py_ssize_t * arg0);") + + +def test_wchar_t(): + cdef = """ + typedef struct { wchar_t* x; } test; + """ + cts = parse_source(cdef, headers=['stddef.h']) + obj = lltype.malloc(cts.gettype('test'), flavor='raw') + obj.c_x = cts.cast('wchar_t*', 0) + obj.c_x = lltype.nullptr(rffi.CWCHARP.TO) + lltype.free(obj, flavor='raw') + + +def test_translate_cast(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.cast('Py_ssize_t*', 0) + graph = build_flow(f) + simplify_graph(graph) + assert len(graph.startblock.operations) == 1 + op = graph.startblock.operations[0] + assert op.args[0] == const(rffi.cast) + assert op.args[1].value is cts.gettype('Py_ssize_t*') + +def test_translate_gettype(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.gettype('Py_ssize_t*') + graph = build_flow(f) + simplify_graph(graph) + # Check that the result is constant-folded + assert graph.startblock.operations == [] + [link] = graph.startblock.exits + assert link.target is graph.returnblock + assert link.args[0] == const(rffi.CArrayPtr(rffi.SSIZE_T)) diff --git a/pypy/module/cpyext/test/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_datetime.py @@ -0,0 +1,354 @@ +import pytest + +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.cdatetime import * +from pypy.module.cpyext.cdatetime import ( + _PyDateTime_Import, _PyDateTime_FromDateAndTime, _PyDate_FromDate, + _PyTime_FromTime, _PyDelta_FromDelta) +import datetime + +class TestDatetime(BaseApiTest): + def test_date(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDate_FromDate(space, 2010, 06, 03, date_api.c_DateType) + assert space.unwrap(space.str(w_date)) == '2010-06-03' + + assert PyDate_Check(space, w_date) + assert PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + + def test_time(self, space): + date_api = _PyDateTime_Import(space) + w_time = _PyTime_FromTime( + space, 23, 15, 40, 123456, space.w_None, date_api.c_TimeType) + assert space.unwrap(space.str(w_time)) == '23:15:40.123456' + + assert PyTime_Check(space, w_time) + assert PyTime_CheckExact(space, w_time) + + assert PyDateTime_TIME_GET_HOUR(space, w_time) == 23 + assert PyDateTime_TIME_GET_MINUTE(space, w_time) == 15 + assert PyDateTime_TIME_GET_SECOND(space, w_time) == 40 + assert PyDateTime_TIME_GET_MICROSECOND(space, w_time) == 123456 + + def test_datetime(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDateTime_FromDateAndTime( + space, 2010, 06, 03, 23, 15, 40, 123456, space.w_None, + date_api.c_DateTimeType) + assert space.unwrap(space.str(w_date)) == '2010-06-03 23:15:40.123456' + + assert PyDateTime_Check(space, w_date) + assert PyDateTime_CheckExact(space, w_date) + assert PyDate_Check(space, w_date) + assert not PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + assert PyDateTime_DATE_GET_HOUR(space, w_date) == 23 + assert PyDateTime_DATE_GET_MINUTE(space, w_date) == 15 + assert PyDateTime_DATE_GET_SECOND(space, w_date) == 40 + assert PyDateTime_DATE_GET_MICROSECOND(space, w_date) == 123456 + + def test_delta(self, space): + date_api = _PyDateTime_Import(space) + w_delta = space.appexec( + [space.wrap(3), space.wrap(15)], """(days, seconds): + from datetime import timedelta + return timedelta(days, seconds) + """) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + w_delta = _PyDelta_FromDelta(space, 10, 20, 30, True, date_api.c_DeltaType) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + assert PyDateTime_DELTA_GET_DAYS(space, w_delta) == 10 + assert PyDateTime_DELTA_GET_SECONDS(space, w_delta) == 20 + assert PyDateTime_DELTA_GET_MICROSECONDS(space, w_delta) == 30 + + def test_fromtimestamp(self, space): + w_args = space.wrap((0,)) + w_date = PyDate_FromTimestamp(space, w_args) + date = datetime.date.fromtimestamp(0) + assert space.unwrap(space.str(w_date)) == str(date) + From pypy.commits at gmail.com Tue Sep 11 17:32:14 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 14:32:14 -0700 (PDT) Subject: [pypy-commit] pypy default: add module/cpyext/test to directories to be split for own tests Message-ID: <5b98345e.1c69fb81.98926.3fe5@mx.google.com> Author: Matti Picus Branch: Changeset: r95101:4cbeaa8bf545 Date: 2018-09-11 23:57 +0300 http://bitbucket.org/pypy/pypy/changeset/4cbeaa8bf545/ Log: add module/cpyext/test to directories to be split for own tests diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -5,7 +5,7 @@ 'translator/c', 'rlib', 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', - 'jit/backend/zarch', + 'jit/backend/zarch', 'module/cpyext/test', ] def collect_one_testdir(testdirs, reldir, tests): From pypy.commits at gmail.com Tue Sep 11 17:32:16 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 14:32:16 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: revert splitting cpyext tests Message-ID: <5b983460.1c69fb81.6b2c8.70bf@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95102:4955ea6d4e9c Date: 2018-09-12 00:09 +0300 http://bitbucket.org/pypy/pypy/changeset/4955ea6d4e9c/ Log: revert splitting cpyext tests diff --git a/pypy/module/cpyext/test0/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py rename from pypy/module/cpyext/test0/test_arraymodule.py rename to pypy/module/cpyext/test/test_arraymodule.py diff --git a/pypy/module/cpyext/test0/test_boolobject.py b/pypy/module/cpyext/test/test_boolobject.py rename from pypy/module/cpyext/test0/test_boolobject.py rename to pypy/module/cpyext/test/test_boolobject.py diff --git a/pypy/module/cpyext/test0/test_borrow.py b/pypy/module/cpyext/test/test_borrow.py rename from pypy/module/cpyext/test0/test_borrow.py rename to pypy/module/cpyext/test/test_borrow.py diff --git a/pypy/module/cpyext/test0/test_bytearrayobject.py b/pypy/module/cpyext/test/test_bytearrayobject.py rename from pypy/module/cpyext/test0/test_bytearrayobject.py rename to pypy/module/cpyext/test/test_bytearrayobject.py diff --git a/pypy/module/cpyext/test0/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py rename from pypy/module/cpyext/test0/test_bytesobject.py rename to pypy/module/cpyext/test/test_bytesobject.py diff --git a/pypy/module/cpyext/test0/test_capsule.py b/pypy/module/cpyext/test/test_capsule.py rename from pypy/module/cpyext/test0/test_capsule.py rename to pypy/module/cpyext/test/test_capsule.py diff --git a/pypy/module/cpyext/test0/test_cell.py b/pypy/module/cpyext/test/test_cell.py rename from pypy/module/cpyext/test0/test_cell.py rename to pypy/module/cpyext/test/test_cell.py diff --git a/pypy/module/cpyext/test0/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py rename from pypy/module/cpyext/test0/test_classobject.py rename to pypy/module/cpyext/test/test_classobject.py diff --git a/pypy/module/cpyext/test0/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py rename from pypy/module/cpyext/test0/test_codecs.py rename to pypy/module/cpyext/test/test_codecs.py diff --git a/pypy/module/cpyext/test0/test_complexobject.py b/pypy/module/cpyext/test/test_complexobject.py rename from pypy/module/cpyext/test0/test_complexobject.py rename to pypy/module/cpyext/test/test_complexobject.py diff --git a/pypy/module/cpyext/test0/test_cparser.py b/pypy/module/cpyext/test/test_cparser.py rename from pypy/module/cpyext/test0/test_cparser.py rename to pypy/module/cpyext/test/test_cparser.py diff --git a/pypy/module/cpyext/test0/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py rename from pypy/module/cpyext/test0/test_datetime.py rename to pypy/module/cpyext/test/test_datetime.py diff --git a/pypy/module/cpyext/test0/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py rename from pypy/module/cpyext/test0/test_dictobject.py rename to pypy/module/cpyext/test/test_dictobject.py diff --git a/pypy/module/cpyext/test0/test_eval.py b/pypy/module/cpyext/test/test_eval.py rename from pypy/module/cpyext/test0/test_eval.py rename to pypy/module/cpyext/test/test_eval.py diff --git a/pypy/module/cpyext/test0/test_fileobject.py b/pypy/module/cpyext/test/test_fileobject.py rename from pypy/module/cpyext/test0/test_fileobject.py rename to pypy/module/cpyext/test/test_fileobject.py diff --git a/pypy/module/cpyext/test0/test_floatobject.py b/pypy/module/cpyext/test/test_floatobject.py rename from pypy/module/cpyext/test0/test_floatobject.py rename to pypy/module/cpyext/test/test_floatobject.py diff --git a/pypy/module/cpyext/test0/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py rename from pypy/module/cpyext/test0/test_frameobject.py rename to pypy/module/cpyext/test/test_frameobject.py diff --git a/pypy/module/cpyext/test0/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py rename from pypy/module/cpyext/test0/test_funcobject.py rename to pypy/module/cpyext/test/test_funcobject.py diff --git a/pypy/module/cpyext/test0/test_genobject.py b/pypy/module/cpyext/test/test_genobject.py rename from pypy/module/cpyext/test0/test_genobject.py rename to pypy/module/cpyext/test/test_genobject.py diff --git a/pypy/module/cpyext/test0/test_getargs.py b/pypy/module/cpyext/test/test_getargs.py rename from pypy/module/cpyext/test0/test_getargs.py rename to pypy/module/cpyext/test/test_getargs.py diff --git a/pypy/module/cpyext/test0/test_import.py b/pypy/module/cpyext/test/test_import.py rename from pypy/module/cpyext/test0/test_import.py rename to pypy/module/cpyext/test/test_import.py diff --git a/pypy/module/cpyext/test0/test_iterator.py b/pypy/module/cpyext/test/test_iterator.py rename from pypy/module/cpyext/test0/test_iterator.py rename to pypy/module/cpyext/test/test_iterator.py diff --git a/pypy/module/cpyext/test0/test_listobject.py b/pypy/module/cpyext/test/test_listobject.py rename from pypy/module/cpyext/test0/test_listobject.py rename to pypy/module/cpyext/test/test_listobject.py diff --git a/pypy/module/cpyext/test0/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py rename from pypy/module/cpyext/test0/test_longobject.py rename to pypy/module/cpyext/test/test_longobject.py diff --git a/pypy/module/cpyext/test0/test_mapping.py b/pypy/module/cpyext/test/test_mapping.py rename from pypy/module/cpyext/test0/test_mapping.py rename to pypy/module/cpyext/test/test_mapping.py diff --git a/pypy/module/cpyext/test0/test_marshal.py b/pypy/module/cpyext/test/test_marshal.py rename from pypy/module/cpyext/test0/test_marshal.py rename to pypy/module/cpyext/test/test_marshal.py diff --git a/pypy/module/cpyext/test0/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py rename from pypy/module/cpyext/test0/test_memoryobject.py rename to pypy/module/cpyext/test/test_memoryobject.py diff --git a/pypy/module/cpyext/test0/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py rename from pypy/module/cpyext/test0/test_methodobject.py rename to pypy/module/cpyext/test/test_methodobject.py diff --git a/pypy/module/cpyext/test0/test_module.py b/pypy/module/cpyext/test/test_module.py rename from pypy/module/cpyext/test0/test_module.py rename to pypy/module/cpyext/test/test_module.py diff --git a/pypy/module/cpyext/test0/test_ndarrayobject.py b/pypy/module/cpyext/test/test_ndarrayobject.py rename from pypy/module/cpyext/test0/test_ndarrayobject.py rename to pypy/module/cpyext/test/test_ndarrayobject.py diff --git a/pypy/module/cpyext/test0/test_number.py b/pypy/module/cpyext/test/test_number.py rename from pypy/module/cpyext/test0/test_number.py rename to pypy/module/cpyext/test/test_number.py diff --git a/pypy/module/cpyext/test1/test_object.py b/pypy/module/cpyext/test/test_object.py rename from pypy/module/cpyext/test1/test_object.py rename to pypy/module/cpyext/test/test_object.py diff --git a/pypy/module/cpyext/test1/test_pycobject.py b/pypy/module/cpyext/test/test_pycobject.py rename from pypy/module/cpyext/test1/test_pycobject.py rename to pypy/module/cpyext/test/test_pycobject.py diff --git a/pypy/module/cpyext/test1/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py rename from pypy/module/cpyext/test1/test_pyerrors.py rename to pypy/module/cpyext/test/test_pyerrors.py diff --git a/pypy/module/cpyext/test1/test_pyfile.py b/pypy/module/cpyext/test/test_pyfile.py rename from pypy/module/cpyext/test1/test_pyfile.py rename to pypy/module/cpyext/test/test_pyfile.py diff --git a/pypy/module/cpyext/test1/test_pysignals.py b/pypy/module/cpyext/test/test_pysignals.py rename from pypy/module/cpyext/test1/test_pysignals.py rename to pypy/module/cpyext/test/test_pysignals.py diff --git a/pypy/module/cpyext/test1/test_pystate.py b/pypy/module/cpyext/test/test_pystate.py rename from pypy/module/cpyext/test1/test_pystate.py rename to pypy/module/cpyext/test/test_pystate.py diff --git a/pypy/module/cpyext/test1/test_pystrtod.py b/pypy/module/cpyext/test/test_pystrtod.py rename from pypy/module/cpyext/test1/test_pystrtod.py rename to pypy/module/cpyext/test/test_pystrtod.py diff --git a/pypy/module/cpyext/test1/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py rename from pypy/module/cpyext/test1/test_sequence.py rename to pypy/module/cpyext/test/test_sequence.py diff --git a/pypy/module/cpyext/test1/test_setobject.py b/pypy/module/cpyext/test/test_setobject.py rename from pypy/module/cpyext/test1/test_setobject.py rename to pypy/module/cpyext/test/test_setobject.py diff --git a/pypy/module/cpyext/test1/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py rename from pypy/module/cpyext/test1/test_sliceobject.py rename to pypy/module/cpyext/test/test_sliceobject.py diff --git a/pypy/module/cpyext/test1/test_structseq.py b/pypy/module/cpyext/test/test_structseq.py rename from pypy/module/cpyext/test1/test_structseq.py rename to pypy/module/cpyext/test/test_structseq.py diff --git a/pypy/module/cpyext/test1/test_sysmodule.py b/pypy/module/cpyext/test/test_sysmodule.py rename from pypy/module/cpyext/test1/test_sysmodule.py rename to pypy/module/cpyext/test/test_sysmodule.py diff --git a/pypy/module/cpyext/test1/test_thread.py b/pypy/module/cpyext/test/test_thread.py rename from pypy/module/cpyext/test1/test_thread.py rename to pypy/module/cpyext/test/test_thread.py diff --git a/pypy/module/cpyext/test1/test_traceback.py b/pypy/module/cpyext/test/test_traceback.py rename from pypy/module/cpyext/test1/test_traceback.py rename to pypy/module/cpyext/test/test_traceback.py diff --git a/pypy/module/cpyext/test1/test_translate.py b/pypy/module/cpyext/test/test_translate.py rename from pypy/module/cpyext/test1/test_translate.py rename to pypy/module/cpyext/test/test_translate.py diff --git a/pypy/module/cpyext/test1/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py rename from pypy/module/cpyext/test1/test_tupleobject.py rename to pypy/module/cpyext/test/test_tupleobject.py diff --git a/pypy/module/cpyext/test1/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py rename from pypy/module/cpyext/test1/test_typeobject.py rename to pypy/module/cpyext/test/test_typeobject.py diff --git a/pypy/module/cpyext/test1/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py rename from pypy/module/cpyext/test1/test_unicodeobject.py rename to pypy/module/cpyext/test/test_unicodeobject.py diff --git a/pypy/module/cpyext/test1/test_userslots.py b/pypy/module/cpyext/test/test_userslots.py rename from pypy/module/cpyext/test1/test_userslots.py rename to pypy/module/cpyext/test/test_userslots.py diff --git a/pypy/module/cpyext/test1/test_version.py b/pypy/module/cpyext/test/test_version.py rename from pypy/module/cpyext/test1/test_version.py rename to pypy/module/cpyext/test/test_version.py diff --git a/pypy/module/cpyext/test1/test_weakref.py b/pypy/module/cpyext/test/test_weakref.py rename from pypy/module/cpyext/test1/test_weakref.py rename to pypy/module/cpyext/test/test_weakref.py From pypy.commits at gmail.com Tue Sep 11 17:32:18 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 14:32:18 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: merge default into py3.5 Message-ID: <5b983462.1c69fb81.3b564.f1cb@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95103:8f66a7558b9c Date: 2018-09-12 00:13 +0300 http://bitbucket.org/pypy/pypy/changeset/8f66a7558b9c/ Log: merge default into py3.5 From pypy.commits at gmail.com Tue Sep 11 17:32:20 2018 From: pypy.commits at gmail.com (mattip) Date: Tue, 11 Sep 2018 14:32:20 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: merge default into py3.5 Message-ID: <5b983464.1c69fb81.70512.4794@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95104:14f241fbf66a Date: 2018-09-12 00:14 +0300 http://bitbucket.org/pypy/pypy/changeset/14f241fbf66a/ Log: merge default into py3.5 diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -5,7 +5,7 @@ 'translator/c', 'rlib', 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', - 'jit/backend/zarch', + 'jit/backend/zarch', 'module/cpyext/test', ] def collect_one_testdir(testdirs, reldir, tests): From pypy.commits at gmail.com Wed Sep 12 08:34:38 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:34:38 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: rarely-used str_w should use ascii-encoding (used for space.bytes_w) Message-ID: <5b9907de.1c69fb81.615bd.80aa@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95105:b2656c189d7f Date: 2018-09-12 10:56 +0300 http://bitbucket.org/pypy/pypy/changeset/b2656c189d7f/ Log: rarely-used str_w should use ascii-encoding (used for space.bytes_w) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,7 +87,8 @@ return space.newint(uid) def str_w(self, space): - return space.text_w(space.str(self)) + # Returns ascii-encoded str + return space.text_w(encode_object(space, self, 'ascii', 'strict')) def utf8_w(self, space): return self._utf8 @@ -1103,11 +1104,11 @@ encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': - s = space.charbuf_w(w_obj) + s = space.utf8_w(w_obj) unicodehelper.check_ascii_or_raise(space, s) return space.newutf8(s, len(s)) if encoding == 'utf-8' or encoding == 'utf8': - s = space.charbuf_w(w_obj) + s = space.utf8_w(w_obj) lgt = unicodehelper.check_utf8_or_raise(space, s) return space.newutf8(s, lgt) w_codecs = space.getbuiltinmodule("_codecs") From pypy.commits at gmail.com Wed Sep 12 08:34:40 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:34:40 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5b9907e0.1c69fb81.0945.a33c@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95106:31d343a2e948 Date: 2018-09-12 10:56 +0300 http://bitbucket.org/pypy/pypy/changeset/31d343a2e948/ Log: merge default into branch diff too long, truncating to 2000 out of 24129 lines diff --git a/pypy/module/cpyext/test/test_abstract.py b/pypy/module/cpyext/test/test_abstract.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_abstract.py @@ -0,0 +1,130 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +import pytest + +class AppTestBufferProtocol(AppTestCpythonExtensionBase): + """Tests for the old buffer protocol.""" + + def w_get_buffer_support(self): + return self.import_extension('buffer_support', [ + ("charbuffer_as_string", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """), + ("check_readbuffer", "METH_O", + """ + return PyBool_FromLong(PyObject_CheckReadBuffer(args)); + """), + ("readbuffer_as_string", "METH_O", + """ + const void *ptr; + Py_ssize_t size; + if (PyObject_AsReadBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("writebuffer_as_string", "METH_O", + """ + void *ptr; + Py_ssize_t size; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize((char*)ptr, size); + """), + ("zero_out_writebuffer", "METH_O", + """ + void *ptr; + Py_ssize_t size; + Py_ssize_t i; + if (PyObject_AsWriteBuffer(args, &ptr, &size) < 0) + return NULL; + for (i = 0; i < size; i++) { + ((char*)ptr)[i] = 0; + } + Py_RETURN_NONE; + """), + ]) + + def test_string(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + + assert buffer_support.check_readbuffer(s) + assert s == buffer_support.readbuffer_as_string(s) + assert raises(TypeError, buffer_support.writebuffer_as_string, s) + assert s == buffer_support.charbuffer_as_string(s) + + def test_buffer(self): + buffer_support = self.get_buffer_support() + + s = 'a\0x' + buf = buffer(s) + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + def test_mmap(self): + import mmap + buffer_support = self.get_buffer_support() + + s = 'a\0x' + mm = mmap.mmap(-1, 3) + mm[:] = s + + assert buffer_support.check_readbuffer(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + buffer_support.zero_out_writebuffer(mm) + assert s == ''.join(mm) + assert s == buffer_support.readbuffer_as_string(mm) + assert s == buffer_support.writebuffer_as_string(mm) + assert s == buffer_support.charbuffer_as_string(mm) + + s = '\0' * 3 + ro_mm = mmap.mmap(-1, 3, access=mmap.ACCESS_READ) + assert buffer_support.check_readbuffer(ro_mm) + assert s == buffer_support.readbuffer_as_string(ro_mm) + assert raises(TypeError, buffer_support.writebuffer_as_string, ro_mm) + assert s == buffer_support.charbuffer_as_string(ro_mm) + + def test_array(self): + import array + buffer_support = self.get_buffer_support() + + s = 'a\0x' + a = array.array('B', [5, 0, 10]) + + buffer_support.zero_out_writebuffer(a) + assert list(a) == [0, 0, 0] + + def test_nonbuffer(self): + # e.g. int + buffer_support = self.get_buffer_support() + + assert not buffer_support.check_readbuffer(42) + assert raises(TypeError, buffer_support.readbuffer_as_string, 42) + assert raises(TypeError, buffer_support.writebuffer_as_string, 42) + assert raises(TypeError, buffer_support.charbuffer_as_string, 42) + + def test_user_class(self): + class MyBuf(str): + pass + s = 'a\0x' + buf = MyBuf(s) + buffer_support = self.get_buffer_support() + + assert buffer_support.check_readbuffer(buf) + assert s == buffer_support.readbuffer_as_string(buf) + assert raises(TypeError, buffer_support.writebuffer_as_string, buf) + assert s == buffer_support.charbuffer_as_string(buf) + + diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_arraymodule.py @@ -0,0 +1,197 @@ +import pytest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.conftest import option + +class AppTestArrayModule(AppTestCpythonExtensionBase): + enable_leak_checking = True + + def setup_class(cls): + from rpython.tool.udir import udir + AppTestCpythonExtensionBase.setup_class.im_func(cls) + if option.runappdirect: + cls.w_udir = str(udir) + else: + cls.w_udir = cls.space.wrap(str(udir)) + + + def test_basic(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + assert arr.typecode == 'i' + assert arr.itemsize == 4 + assert arr[2] == 3 + assert len(arr.buffer_info()) == 2 + exc = raises(TypeError, module.array.append) + errstr = str(exc.value) + assert errstr.startswith("descriptor 'append' of") + arr.append(4) + assert arr.tolist() == [1, 2, 3, 4] + assert len(arr) == 4 + + def test_iter(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + sum = 0 + for i in arr: + sum += i + assert sum == 6 + + def test_index(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[3] == 4 + raises(IndexError, arr.__getitem__, 10) + del arr[2] + assert arr.tolist() == [1, 2, 4] + arr[2] = 99 + assert arr.tolist() == [1, 2, 99] + + def test_slice_get(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[:].tolist() == [1, 2, 3, 4] + assert arr[1:].tolist() == [2, 3, 4] + assert arr[:2].tolist() == [1, 2] + assert arr[1:3].tolist() == [2, 3] + + def test_slice_object(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[slice(1, 3)].tolist() == [2,3] + arr[slice(1, 3)] = module.array('i', [21, 22, 23]) + assert arr.tolist() == [1, 21, 22, 23, 4] + del arr[slice(1, 3)] + assert arr.tolist() == [1, 23, 4] + raises(TypeError, 'arr[slice(1, 3)] = "abc"') + + def test_buffer(self): + import sys + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + buf = buffer(arr) + exc = raises(TypeError, "buf[1] = '1'") + assert str(exc.value) == "buffer is read-only" + if sys.byteorder == 'big': + expected = '\0\0\0\x01' '\0\0\0\x02' '\0\0\0\x03' '\0\0\0\x04' + else: + expected = '\x01\0\0\0' '\x02\0\0\0' '\x03\0\0\0' '\x04\0\0\0' + assert str(buf) == expected + assert str(buffer('a') + arr) == "a" + expected + # python2 special cases empty-buffer + obj + assert str(buffer('') + arr) == "array('i', [1, 2, 3, 4])" + + def test_releasebuffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + module.create_and_release_buffer(arr) + assert module.get_releasebuffer_cnt() == 1 + + def test_Py_buffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + m = memoryview(arr) + assert module.get_releasebuffer_cnt() == 0 + del m + self.debug_collect() + assert module.get_releasebuffer_cnt() == 1 + + def test_pickle(self): + import pickle + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + s = pickle.dumps(arr) + # pypy exports __dict__ on cpyext objects, so the pickle picks up the {} state value + #assert s == "carray\n_reconstruct\np0\n(S'i'\np1\n(lp2\nI1\naI2\naI3\naI4\natp3\nRp4\n." + rra = pickle.loads(s) # rra is arr backwards + #assert arr.tolist() == rra.tolist() + + def test_binop_mul_impl(self): + # check that rmul is called + module = self.import_module(name='array') + arr = module.array('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + module.switch_multiply() + res = [1, 2, 3] * arr + assert res == [2, 4, 6] + + @pytest.mark.xfail + def test_subclass_dealloc(self): + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + module.readbuffer_as_string(arr) + class A(object): + pass + assert not module.same_dealloc(arr, module.array('i', [2])) + assert module.same_dealloc(arr, A()) + + def test_subclass(self): + import struct + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + + val = module.readbuffer_as_string(arr) + assert val == struct.pack('i', 2) + + def test_unicode_readbuffer(self): + # Not really part of array, refactor + import struct + module = self.import_module(name='array') + val = module.readbuffer_as_string('abcd') + assert val == 'abcd' + val = module.readbuffer_as_string(u'\u03a3') + assert val is not None + + def test_readinto(self): + module = self.import_module(name='array') + a = module.array('c') + a.fromstring('0123456789') + filename = self.udir + "/_test_file" + f = open(filename, 'w+b') + f.write('foobar') + f.seek(0) + n = f.readinto(a) + f.close() + assert n == 6 + assert len(a) == 10 + assert a.tostring() == 'foobar6789' + + def test_iowrite(self): + module = self.import_module(name='array') + from io import BytesIO + a = module.array('c') + a.fromstring('0123456789') + fd = BytesIO() + # only test that it works + fd.write(a) + + def test_getitem_via_PySequence_GetItem(self): + module = self.import_module(name='array') + a = module.array('i', range(10)) + # call via tp_as_mapping.mp_subscript + assert 5 == a[-5] + # PySequence_ITEM used to call space.getitem() which + # prefers tp_as_mapping.mp_subscript over tp_as_sequence.sq_item + # Now fixed so this test raises (array_item does not add len(a), + # array_subscr does) + raises(IndexError, module.getitem, a, -5) + + def test_subclass_with_attribute(self): + module = self.import_module(name='array') + class Sub(module.array): + def addattrib(self): + print('called addattrib') + self.attrib = True + import gc + module.subclass_with_attribute(Sub, "addattrib", "attrib", gc.collect) + assert Sub.__module__ == __name__ diff --git a/pypy/module/cpyext/test/test_boolobject.py b/pypy/module/cpyext/test/test_boolobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_boolobject.py @@ -0,0 +1,48 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.boolobject import PyBool_FromLong + +class TestBoolObject(BaseApiTest): + def test_fromlong(self, space): + for i in range(-3, 3): + obj = PyBool_FromLong(space, i) + if i: + assert obj is space.w_True + else: + assert obj is space.w_False + +class AppTestBoolMacros(AppTestCpythonExtensionBase): + def test_macros(self): + module = self.import_extension('foo', [ + ("get_true", "METH_NOARGS", "Py_RETURN_TRUE;"), + ("get_false", "METH_NOARGS", "Py_RETURN_FALSE;"), + ]) + assert module.get_true() == True + assert module.get_false() == False + + def test_toint(self): + module = self.import_extension('foo', [ + ("to_int", "METH_O", + ''' + if (args->ob_type->tp_as_number && args->ob_type->tp_as_number->nb_int) { + return args->ob_type->tp_as_number->nb_int(args); + } + else { + PyErr_SetString(PyExc_TypeError,"cannot convert bool to int"); + return NULL; + } + '''), ]) + assert module.to_int(False) == 0 + assert module.to_int(True) == 1 + + def test_check(self): + module = self.import_extension('foo', [ + ("type_check", "METH_O", + ''' + return PyLong_FromLong(PyBool_Check(args)); + ''')]) + assert module.type_check(True) + assert module.type_check(False) + assert not module.type_check(None) + assert not module.type_check(1.0) + diff --git a/pypy/module/cpyext/test/test_borrow.py b/pypy/module/cpyext/test/test_borrow.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_borrow.py @@ -0,0 +1,71 @@ +import py +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.pyobject import make_ref + + +class AppTestBorrow(AppTestCpythonExtensionBase): + def test_tuple_borrowing(self): + module = self.import_extension('foo', [ + ("test_borrowing", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *f = PyFloat_FromDouble(42.0); + PyObject *g = NULL; + printf("Refcnt1: %ld\\n", f->ob_refcnt); + PyTuple_SetItem(t, 0, f); // steals reference + printf("Refcnt2: %ld\\n", f->ob_refcnt); + f = PyTuple_GetItem(t, 0); // borrows reference + printf("Refcnt3: %ld\\n", f->ob_refcnt); + g = PyTuple_GetItem(t, 0); // borrows reference again + printf("Refcnt4: %ld\\n", f->ob_refcnt); + printf("COMPARE: %i\\n", f == g); + fflush(stdout); + Py_DECREF(t); + Py_RETURN_TRUE; + """), + ]) + assert module.test_borrowing() # the test should not leak + + def test_borrow_destroy(self): + module = self.import_extension('foo', [ + ("test_borrow_destroy", "METH_NOARGS", + """ + PyObject *i = PyInt_FromLong(42); + PyObject *j; + PyObject *t1 = PyTuple_Pack(1, i); + PyObject *t2 = PyTuple_Pack(1, i); + Py_DECREF(i); + + i = PyTuple_GetItem(t1, 0); + PyTuple_GetItem(t2, 0); + Py_DECREF(t2); + + j = PyInt_FromLong(PyInt_AsLong(i)); + Py_DECREF(t1); + return j; + """), + ]) + assert module.test_borrow_destroy() == 42 + + def test_double_borrow(self): + if self.runappdirect: + py.test.xfail('segfault') + module = self.import_extension('foo', [ + ("run", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *s = PyRun_String("set()", Py_eval_input, + Py_None, Py_None); + PyObject *w = PyWeakref_NewRef(s, Py_None); + PyTuple_SetItem(t, 0, s); + PyTuple_GetItem(t, 0); + PyTuple_GetItem(t, 0); + Py_DECREF(t); + return w; + """), + ]) + wr = module.run() + # check that the set() object was deallocated + self.debug_collect() + assert wr() is None diff --git a/pypy/module/cpyext/test/test_bufferobject.py b/pypy/module/cpyext/test/test_bufferobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bufferobject.py @@ -0,0 +1,123 @@ +from rpython.rtyper.lltypesystem import lltype +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.api import PyObject + +class AppTestBufferObject(AppTestCpythonExtensionBase): + + def test_FromMemory(self): + module = self.import_extension('foo', [ + ("get_FromMemory", "METH_NOARGS", + """ + cbuf = malloc(4); + cbuf[0] = 'a'; + cbuf[1] = 'b'; + cbuf[2] = 'c'; + cbuf[3] = '\\0'; + return PyBuffer_FromMemory(cbuf, 4); + """), + ("free_buffer", "METH_NOARGS", + """ + free(cbuf); + Py_RETURN_NONE; + """), + ("check_ascharbuffer", "METH_O", + """ + char *ptr; + Py_ssize_t size; + if (PyObject_AsCharBuffer(args, (const char **)&ptr, &size) < 0) + return NULL; + return PyString_FromStringAndSize(ptr, size); + """) + ], prologue = """ + static char* cbuf = NULL; + """) + buf = module.get_FromMemory() + assert str(buf) == 'abc\0' + + assert module.check_ascharbuffer(buf) == 'abc\0' + + module.free_buffer() + + def test_Buffer_New(self): + module = self.import_extension('foo', [ + ("buffer_new", "METH_NOARGS", + """ + return PyBuffer_New(150); + """), + ]) + b = module.buffer_new() + raises(AttributeError, getattr, b, 'x') + + def test_array_buffer(self): + if self.runappdirect: + skip('PyBufferObject not available outside buffer object.c') + module = self.import_extension('foo', [ + ("roundtrip", "METH_O", + """ + PyBufferObject *buf = (PyBufferObject *)args; + return PyString_FromStringAndSize(buf->b_ptr, buf->b_size); + """), + ]) + import array + a = array.array('c', 'text') + b = buffer(a) + assert module.roundtrip(b) == 'text' + + + def test_issue2752(self): + iterations = 10 + if self.runappdirect: + iterations = 2000 + module = self.import_extension('foo', [ + ("test_mod", 'METH_VARARGS', + """ + PyObject *obj; + Py_buffer bp; + if (!PyArg_ParseTuple(args, "O", &obj)) + return NULL; + + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + + if (((unsigned char*)bp.buf)[0] != '0') { + void * buf = (void*)bp.buf; + unsigned char val[4]; + char * s = PyString_AsString(obj); + memcpy(val, bp.buf, 4); + PyBuffer_Release(&bp); + if (PyObject_GetBuffer(obj, &bp, PyBUF_SIMPLE) == -1) + return NULL; + PyErr_Format(PyExc_ValueError, + "mismatch: %p [%x %x %x %x...] now %p [%x %x %x %x...] as str '%s'", + buf, val[0], val[1], val[2], val[3], + (void *)bp.buf, + ((unsigned char*)bp.buf)[0], + ((unsigned char*)bp.buf)[1], + ((unsigned char*)bp.buf)[2], + ((unsigned char*)bp.buf)[3], + s); + PyBuffer_Release(&bp); + return NULL; + } + + PyBuffer_Release(&bp); + Py_RETURN_NONE; + """), + ]) + bufsize = 4096 + def getdata(bufsize): + data = b'01234567' + for x in range(18): + data += data + if len(data) >= bufsize: + break + return data + for j in range(iterations): + block = getdata(bufsize) + assert block[:8] == '01234567' + try: + module.test_mod(block) + except ValueError as e: + print("%s at it=%d" % (e, j)) + assert False diff --git a/pypy/module/cpyext/test/test_bytearrayobject.py b/pypy/module/cpyext/test/test_bytearrayobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytearrayobject.py @@ -0,0 +1,188 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestStringObject(AppTestCpythonExtensionBase): + def test_basic(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize("Hello world", 12); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyByteArray_FromStringAndSize("Hello world", 12); + int result = 0; + + if(PyByteArray_Size(s) == 12) { + result = 1; + } + Py_DECREF(s); + return PyBool_FromLong(result); + """), + ("test_is_bytearray", "METH_VARARGS", + """ + return PyBool_FromLong(PyByteArray_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world\x00' + assert module.test_Size() + assert module.test_is_bytearray(bytearray(b"")) + assert not module.test_is_bytearray(()) + + def test_bytearray_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyByteArray_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyByteArray_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyByteArray_AsString(s); + if (c == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytearray() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytearray_mutable(self): + module = self.import_extension('foo', [ + ("mutable", "METH_NOARGS", + """ + PyObject *base; + base = PyByteArray_FromStringAndSize("test", 10); + if (PyByteArray_GET_SIZE(base) != 10) + return PyLong_FromLong(-PyByteArray_GET_SIZE(base)); + memcpy(PyByteArray_AS_STRING(base), "works", 6); + Py_INCREF(base); + return base; + """), + ]) + s = module.mutable() + if s == b'\x00' * 10: + assert False, "no RW access to bytearray" + assert s[:6] == b'works\x00' + + def test_AsByteArray(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + const char *c; + PyObject *s2, *s1 = PyByteArray_FromStringAndSize("test", 4); + if (s1 == NULL) + return NULL; + c = PyByteArray_AsString(s1); + s2 = PyByteArray_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytearray() + assert s == b'test' + + def test_manipulations(self): + import sys + module = self.import_extension('foo', [ + ("bytearray_from_bytes", "METH_VARARGS", + ''' + return PyByteArray_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("bytes_from_bytearray", "METH_VARARGS", + ''' + char * buf; + int n; + PyObject * obj; + obj = PyTuple_GetItem(args, 0); + buf = PyByteArray_AsString(obj); + if (buf == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + n = PyByteArray_Size(obj); + return PyBytes_FromStringAndSize(buf, n); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject * ret, *right, *left; + PyObject *ba1, *ba2; + if (!PyArg_ParseTuple(args, "OO", &left, &right)) { + return PyUnicode_FromString("parse failed"); + } + ba1 = PyByteArray_FromObject(left); + ba2 = PyByteArray_FromObject(right); + if (ba1 == NULL || ba2 == NULL) + { + /* exception should be set */ + return NULL; + } + ret = PyByteArray_Concat(ba1, ba2); + return ret; + """)]) + assert module.bytearray_from_bytes(b"huheduwe") == b"huhe" + assert module.bytes_from_bytearray(bytearray(b'abc')) == b'abc' + if '__pypy__' in sys.builtin_module_names: + # CPython only makes an assert. + raises(ValueError, module.bytes_from_bytearray, 4.0) + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + assert not isinstance(ret, str) + assert isinstance(ret, bytearray) + raises(TypeError, module.concat, b'abc', u'def') + + def test_bytearray_resize(self): + module = self.import_extension('foo', [ + ("bytearray_resize", "METH_VARARGS", + ''' + PyObject *obj, *ba; + int newsize, oldsize, ret; + if (!PyArg_ParseTuple(args, "Oi", &obj, &newsize)) { + return PyUnicode_FromString("parse failed"); + } + + ba = PyByteArray_FromObject(obj); + if (ba == NULL) + return NULL; + oldsize = PyByteArray_Size(ba); + if (oldsize == 0) + { + return PyUnicode_FromString("oldsize is 0"); + } + ret = PyByteArray_Resize(ba, newsize); + if (ret != 0) + { + printf("ret, oldsize, newsize= %d, %d, %d\\n", ret, oldsize, newsize); + return NULL; + } + return ba; + ''' + )]) + ret = module.bytearray_resize(b'abc', 6) + assert len(ret) == 6,"%s, len=%d" % (ret, len(ret)) + assert ret == b'abc\x00\x00\x00' + ret = module.bytearray_resize(b'abcdefghi', 4) + assert len(ret) == 4,"%s, len=%d" % (ret, len(ret)) + assert ret == b'abcd' diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -0,0 +1,611 @@ +# encoding: utf-8 +import pytest +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.interpreter.error import OperationError +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.bytesobject import ( + new_empty_str, PyBytesObject, _PyString_Resize, PyString_Concat, + PyString_ConcatAndDel, PyString_Format, PyString_InternFromString, + PyString_AsEncodedObject, PyString_AsDecodedObject, _PyString_Eq, + _PyString_Join) +from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP, generic_cpy_call +from pypy.module.cpyext.pyobject import decref, from_ref, make_ref +from pypy.module.cpyext.buffer import PyObject_AsCharBuffer +from pypy.module.cpyext.api import PyTypeObjectPtr + + +class AppTestBytesObject(AppTestCpythonExtensionBase): + def test_bytesobject(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyBytes_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyBytes_FromString("Hello world"); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyBytes_FromString("Hello world"); + int result = PyBytes_Size(s); + + Py_DECREF(s); + return PyLong_FromLong(result); + """), + ("test_Size_exception", "METH_NOARGS", + """ + PyObject* f = PyFloat_FromDouble(1.0); + PyBytes_Size(f); + + Py_DECREF(f); + return NULL; + """), + ("test_is_bytes", "METH_VARARGS", + """ + return PyBool_FromLong(PyBytes_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world' + assert module.test_Size() == 11 + raises(TypeError, module.test_Size_exception) + + assert module.test_is_bytes(b"") + assert not module.test_is_bytes(()) + + def test_bytes_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyBytes_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyBytes_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyBytes_AS_STRING(s); + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytes() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytes_tp_alloc(self): + module = self.import_extension('foo', [ + ("tpalloc", "METH_NOARGS", + """ + PyObject *base; + PyTypeObject * type; + PyBytesObject *obj; + base = PyBytes_FromString("test"); + if (PyBytes_GET_SIZE(base) != 4) + return PyLong_FromLong(-PyBytes_GET_SIZE(base)); + type = base->ob_type; + if (type->tp_itemsize != 1) + return PyLong_FromLong(type->tp_itemsize); + obj = (PyBytesObject*)type->tp_alloc(type, 10); + if (PyBytes_GET_SIZE(obj) != 10) + return PyLong_FromLong(PyBytes_GET_SIZE(obj)); + /* cannot work, there is only RO access + memcpy(PyBytes_AS_STRING(obj), "works", 6); */ + Py_INCREF(obj); + return (PyObject*)obj; + """), + ('alloc_rw', "METH_NOARGS", + ''' + PyObject *obj = (PyObject*)_PyObject_NewVar(&PyBytes_Type, 10); + memcpy(PyBytes_AS_STRING(obj), "works", 6); + return (PyObject*)obj; + '''), + ]) + s = module.alloc_rw() + assert s[:6] == b'works\0' # s[6:10] contains random garbage + s = module.tpalloc() + assert s == b'\x00' * 10 + + def test_AsString(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + char *c; + PyObject* s2, *s1 = PyBytes_FromStringAndSize("test", 4); + c = PyBytes_AsString(s1); + s2 = PyBytes_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytes() + assert s == b'test' + + def test_manipulations(self): + module = self.import_extension('foo', [ + ("bytes_as_string", "METH_VARARGS", + ''' + return PyBytes_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject ** v; + PyObject * left = PyTuple_GetItem(args, 0); + Py_INCREF(left); /* the reference will be stolen! */ + v = &left; + PyBytes_Concat(v, PyTuple_GetItem(args, 1)); + return *v; + """)]) + assert module.bytes_as_string(b"huheduwe") == b"huhe" + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + ret = module.concat('abc', u'def') + assert not isinstance(ret, str) + assert isinstance(ret, unicode) + assert ret == 'abcdef' + + def test_py_bytes_as_string_None(self): + module = self.import_extension('foo', [ + ("string_None", "METH_VARARGS", + ''' + if (PyBytes_AsString(Py_None)) { + Py_RETURN_NONE; + } + return NULL; + ''' + )]) + raises(TypeError, module.string_None) + + def test_AsStringAndSize(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject* s1 = PyBytes_FromStringAndSize("te\\0st", 5); + char *buf; + Py_ssize_t len; + if (PyBytes_AsStringAndSize(s1, &buf, &len) < 0) + return NULL; + if (len != 5) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + if (PyBytes_AsStringAndSize(s1, &buf, NULL) >= 0) { + PyErr_SetString(PyExc_AssertionError, "Should Have failed"); + return NULL; + } + PyErr_Clear(); + Py_DECREF(s1); + Py_INCREF(Py_None); + return Py_None; + """), + ("c_only", "METH_NOARGS", + """ + int ret; + char * buf2; + PyObject * obj = PyBytes_FromStringAndSize(NULL, 1024); + if (!obj) + return NULL; + buf2 = PyBytes_AsString(obj); + if (!buf2) + return NULL; + /* buf should not have been forced, issue #2395 */ + ret = _PyBytes_Resize(&obj, 512); + if (ret < 0) + return NULL; + Py_DECREF(obj); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getbytes() + module.c_only() + + def test_py_string_as_string_Unicode(self): + module = self.import_extension('foo', [ + ("getstring_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + buf = PyString_AsString(u1); + if (buf == NULL) + return NULL; + if (buf[3] != 't') { + PyErr_SetString(PyExc_AssertionError, "Bad conversion"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ("getstringandsize_unicode", "METH_NOARGS", + """ + Py_UNICODE chars[] = {'t', 'e', 's', 't'}; + PyObject* u1 = PyUnicode_FromUnicode(chars, 4); + char *buf; + Py_ssize_t len; + if (PyString_AsStringAndSize(u1, &buf, &len) < 0) + return NULL; + if (len != 4) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + Py_DECREF(u1); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getstring_unicode() + module.getstringandsize_unicode() + + def test_format_v(self): + module = self.import_extension('foo', [ + ("test_string_format_v", "METH_VARARGS", + ''' + return helper("bla %d ble %s\\n", + PyInt_AsLong(PyTuple_GetItem(args, 0)), + PyString_AsString(PyTuple_GetItem(args, 1))); + ''' + ) + ], prologue=''' + PyObject* helper(char* fmt, ...) + { + va_list va; + PyObject* res; + va_start(va, fmt); + res = PyString_FromFormatV(fmt, va); + va_end(va); + return res; + } + ''') + res = module.test_string_format_v(1, "xyz") + assert res == "bla 1 ble xyz\n" + + def test_format(self): + module = self.import_extension('foo', [ + ("test_string_format", "METH_VARARGS", + ''' + return PyString_FromFormat("bla %d ble %s\\n", + PyInt_AsLong(PyTuple_GetItem(args, 0)), + PyString_AsString(PyTuple_GetItem(args, 1))); + ''' + ) + ]) + res = module.test_string_format(1, "xyz") + assert res == "bla 1 ble xyz\n" + + def test_intern_inplace(self): + module = self.import_extension('foo', [ + ("test_intern_inplace", "METH_O", + ''' + PyObject *s = args; + Py_INCREF(s); + PyString_InternInPlace(&s); + if (((PyBytesObject*)s)->ob_sstate == SSTATE_NOT_INTERNED) + { + Py_DECREF(s); + s = PyString_FromString("interned error"); + } + return s; + ''' + ) + ]) + # This does not test much, but at least the refcounts are checked. + assert module.test_intern_inplace('s') == 's' + + def test_bytes_macros(self): + """The PyString_* macros cast, and calls expecting that build.""" + module = self.import_extension('foo', [ + ("test_macro_invocations", "METH_NOARGS", + """ + PyObject* o = PyString_FromString(""); + PyBytesObject* u = (PyBytesObject*)o; + + PyString_GET_SIZE(u); + PyString_GET_SIZE(o); + + PyString_AS_STRING(o); + PyString_AS_STRING(u); + + return o; + """)]) + assert module.test_macro_invocations() == '' + + def test_hash_and_state(self): + module = self.import_extension('foo', [ + ("test_hash", "METH_VARARGS", + ''' + PyObject* obj = (PyTuple_GetItem(args, 0)); + long hash = ((PyBytesObject*)obj)->ob_shash; + return PyLong_FromLong(hash); + ''' + ), + ("test_sstate", "METH_NOARGS", + ''' + PyObject *s = PyString_FromString("xyz"); + /*int sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + PyString_InternInPlace(&s); + /*sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + Py_DECREF(s); + return PyBool_FromLong(1); + '''), + ], prologue='#include ') + res = module.test_hash("xyz") + assert res == hash('xyz') + # doesn't really test, but if printf is enabled will prove sstate + assert module.test_sstate() + + def test_subclass(self): + # taken from PyStringArrType_Type in numpy's scalartypes.c.src + module = self.import_extension('bar', [ + ("newsubstr", "METH_O", + """ + PyObject * obj; + char * data; + int len; + + data = PyString_AS_STRING(args); + len = PyString_GET_SIZE(args); + if (data == NULL) + Py_RETURN_NONE; + obj = PyArray_Scalar(data, len); + return obj; + """), + ("get_len", "METH_O", + """ + return PyLong_FromLong(PyObject_Size(args)); + """), + ('has_nb_add', "METH_O", + ''' + if (args->ob_type->tp_as_number == NULL) { + Py_RETURN_FALSE; + } + if (args->ob_type->tp_as_number->nb_add == NULL) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + '''), + ], prologue=""" + #include + PyTypeObject PyStringArrType_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "bar.string_", /* tp_name*/ + sizeof(PyBytesObject), /* tp_basicsize*/ + 0 /* tp_itemsize */ + }; + + static PyObject * + stringtype_repr(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + static PyObject * + stringtype_str(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + PyObject * + PyArray_Scalar(char *data, int n) + { + PyTypeObject *type = &PyStringArrType_Type; + PyObject *obj; + void *destptr; + int itemsize = n; + obj = type->tp_alloc(type, itemsize); + if (obj == NULL) { + return NULL; + } + destptr = PyString_AS_STRING(obj); + ((PyBytesObject *)obj)->ob_shash = -1; + memcpy(destptr, data, itemsize); + return obj; + } + """, more_init = ''' + PyStringArrType_Type.tp_alloc = NULL; + PyStringArrType_Type.tp_free = NULL; + + PyStringArrType_Type.tp_repr = stringtype_repr; + PyStringArrType_Type.tp_str = stringtype_str; + PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; + PyStringArrType_Type.tp_itemsize = sizeof(char); + PyStringArrType_Type.tp_base = &PyString_Type; + PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; + if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; + ''') + + a = module.newsubstr('abc') + assert module.has_nb_add('a') is False + assert module.has_nb_add(a) is False + assert type(a).__name__ == 'string_' + assert a == 'abc' + assert 3 == module.get_len(a) + b = module.newsubstr('') + assert 0 == module.get_len(b) + +class TestBytes(BaseApiTest): + def test_bytes_resize(self, space): + py_str = new_empty_str(space, 10) + ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + py_str.c_ob_sval[0] = 'a' + py_str.c_ob_sval[1] = 'b' + py_str.c_ob_sval[2] = 'c' + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 3) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 3 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[3] == '\x00' + # the same for growing + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 10) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 10 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[10] == '\x00' + decref(space, ar[0]) + lltype.free(ar, flavor='raw') + + def test_string_buffer(self, space): + py_str = new_empty_str(space, 10) + c_buf = py_str.c_ob_type.c_tp_as_buffer + assert c_buf + py_obj = rffi.cast(PyObject, py_str) + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, lltype.nullptr(Py_ssize_tP.TO)) == 1 + ref = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, ref) == 1 + assert ref[0] == 10 + lltype.free(ref, flavor='raw') + ref = lltype.malloc(rffi.VOIDPP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getreadbuffer, + py_obj, 0, ref) == 10 + lltype.free(ref, flavor='raw') + decref(space, py_obj) + + def test_Concat(self, space): + ref = make_ref(space, space.wrap('abc')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref + prev_refcnt = ref.c_ob_refcnt + PyString_Concat(space, ptr, space.wrap('def')) + assert ref.c_ob_refcnt == prev_refcnt - 1 + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + with pytest.raises(OperationError): + PyString_Concat(space, ptr, space.w_None) + assert not ptr[0] + ptr[0] = lltype.nullptr(PyObject.TO) + PyString_Concat(space, ptr, space.wrap('def')) # should not crash + lltype.free(ptr, flavor='raw') + + def test_ConcatAndDel(self, space): + ref1 = make_ref(space, space.wrap('abc')) + ref2 = make_ref(space, space.wrap('def')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref1 + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + assert ref2.c_ob_refcnt == prev_refcnf - 1 + decref(space, ptr[0]) + ptr[0] = lltype.nullptr(PyObject.TO) + ref2 = make_ref(space, space.wrap('foo')) + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) # should not crash + assert ref2.c_ob_refcnt == prev_refcnf - 1 + lltype.free(ptr, flavor='raw') + + def test_format(self, space): + assert "1 2" == space.unwrap( + PyString_Format(space, space.wrap('%s %d'), space.wrap((1, 2)))) + + def test_asbuffer(self, space): + bufp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + + w_text = space.wrap("text") + ref = make_ref(space, w_text) + prev_refcnt = ref.c_ob_refcnt + assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 + assert ref.c_ob_refcnt == prev_refcnt + assert lenp[0] == 4 + assert rffi.charp2str(bufp[0]) == 'text' + lltype.free(bufp, flavor='raw') + lltype.free(lenp, flavor='raw') + decref(space, ref) + + def test_intern(self, space): + buf = rffi.str2charp("test") + w_s1 = PyString_InternFromString(space, buf) + w_s2 = PyString_InternFromString(space, buf) + rffi.free_charp(buf) + assert w_s1 is w_s2 + + def test_AsEncodedObject(self, space): + ptr = space.wrap('abc') + + errors = rffi.str2charp("strict") + + encoding = rffi.str2charp("hex") + res = PyString_AsEncodedObject(space, ptr, encoding, errors) + assert space.unwrap(res) == "616263" + + res = PyString_AsEncodedObject(space, + ptr, encoding, lltype.nullptr(rffi.CCHARP.TO)) + assert space.unwrap(res) == "616263" + rffi.free_charp(encoding) + + encoding = rffi.str2charp("unknown_encoding") + with raises_w(space, LookupError): + PyString_AsEncodedObject(space, ptr, encoding, errors) + rffi.free_charp(encoding) + + rffi.free_charp(errors) + + NULL = lltype.nullptr(rffi.CCHARP.TO) + res = PyString_AsEncodedObject(space, ptr, NULL, NULL) + assert space.unwrap(res) == "abc" + with raises_w(space, TypeError): + PyString_AsEncodedObject(space, space.wrap(2), NULL, NULL) + + def test_AsDecodedObject(self, space): + w_str = space.wrap('caf\xe9') + encoding = rffi.str2charp("latin-1") + w_res = PyString_AsDecodedObject(space, w_str, encoding, None) + rffi.free_charp(encoding) + assert space.unwrap(w_res) == u"caf\xe9" + + def test_eq(self, space): + assert 1 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("hello")) + assert 0 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("world")) + + def test_join(self, space): + w_sep = space.wrap('') + w_seq = space.wrap(['a', 'b']) + w_joined = _PyString_Join(space, w_sep, w_seq) + assert space.unwrap(w_joined) == 'ab' diff --git a/pypy/module/cpyext/test/test_capsule.py b/pypy/module/cpyext/test/test_capsule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_capsule.py @@ -0,0 +1,29 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + +class AppTestCapsule(AppTestCpythonExtensionBase): + def test_capsule_import(self): + module = self.import_extension('foo', [ + ("set_ptr", "METH_O", + """ + PyObject *capsule, *module; + void *ptr = PyLong_AsVoidPtr(args); + if (PyErr_Occurred()) return NULL; + capsule = PyCapsule_New(ptr, "foo._ptr", NULL); + if (PyErr_Occurred()) return NULL; + module = PyImport_ImportModule("foo"); + PyModule_AddObject(module, "_ptr", capsule); + Py_DECREF(module); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; + """), + ("get_ptr", "METH_NOARGS", + """ + void *ptr = PyCapsule_Import("foo._ptr", 0); + if (PyErr_Occurred()) return NULL; + return PyLong_FromVoidPtr(ptr); + """)]) + module.set_ptr(1234) + assert 'capsule object "foo._ptr" at ' in str(module._ptr) + import gc; gc.collect() + assert module.get_ptr() == 1234 + del module._ptr diff --git a/pypy/module/cpyext/test/test_cell.py b/pypy/module/cpyext/test/test_cell.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cell.py @@ -0,0 +1,20 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestCell(AppTestCpythonExtensionBase): + def test_cell_type(self): + module = self.import_extension('foo', [ + ("cell_type", "METH_O", + """ + PyDict_SetItemString(args, "cell", (PyObject*)&PyCell_Type); + Py_RETURN_NONE; + """)]) + d = {} + module.cell_type(d) + def f(o): + def g(): + return o + return g + + cell_type = type(f(0).func_closure[0]) + assert d["cell"] is cell_type diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_classobject.py @@ -0,0 +1,93 @@ +from pypy.interpreter.function import Function +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.classobject import ( + PyClass_Check, PyClass_New, PyInstance_Check, PyInstance_New, + PyInstance_NewRaw, _PyInstance_Lookup) +from pypy.module.cpyext.object import PyObject_GetAttr +from pypy.module.cpyext.pyobject import get_w_obj_and_decref + +class TestClassObject(BaseApiTest): + def test_newinstance(self, space): + w_class = space.appexec([], """(): + class C: + x = None + def __init__(self, *args, **kwargs): + self.x = 1 + self.args = args + self.__dict__.update(kwargs) + return C + """) + + assert PyClass_Check(space, w_class) + + w_instance = PyInstance_NewRaw(space, w_class, None) + assert PyInstance_Check(space, w_instance) + assert space.getattr(w_instance, space.wrap('x')) is space.w_None + + w_instance = PyInstance_NewRaw(space, w_class, space.wrap(dict(a=3))) + assert space.getattr(w_instance, space.wrap('x')) is space.w_None + assert space.unwrap(space.getattr(w_instance, space.wrap('a'))) == 3 + + w_instance = PyInstance_New(space, w_class, + space.wrap((3,)), space.wrap(dict(y=2))) + assert space.unwrap(space.getattr(w_instance, space.wrap('x'))) == 1 + assert space.unwrap(space.getattr(w_instance, space.wrap('y'))) == 2 + assert space.unwrap(space.getattr(w_instance, space.wrap('args'))) == (3,) + + def test_lookup(self, space): + w_instance = space.appexec([], """(): + class C: + def __init__(self): + self.x = None + def f(self): pass + return C() + """) + + assert PyInstance_Check(space, w_instance) + py_obj = PyObject_GetAttr(space, w_instance, space.wrap('x')) + assert get_w_obj_and_decref(space, py_obj) is space.w_None + assert _PyInstance_Lookup(space, w_instance, space.wrap('x')) is space.w_None + assert _PyInstance_Lookup(space, w_instance, space.wrap('y')) is None + + # getattr returns a bound method + py_obj = PyObject_GetAttr(space, w_instance, space.wrap('f')) + assert not isinstance(get_w_obj_and_decref(space, py_obj), Function) + # _PyInstance_Lookup returns the raw descriptor + assert isinstance( + _PyInstance_Lookup(space, w_instance, space.wrap('f')), Function) + + def test_pyclass_new(self, space): + w_bases = space.newtuple([]) + w_dict = space.newdict() + w_name = space.wrap("C") + w_class = PyClass_New(space, w_bases, w_dict, w_name) + assert not space.isinstance_w(w_class, space.w_type) + w_instance = space.call_function(w_class) + assert PyInstance_Check(space, w_instance) + assert space.is_true(space.call_method(space.builtin, "isinstance", + w_instance, w_class)) + +class AppTestStringObject(AppTestCpythonExtensionBase): + def test_class_type(self): + module = self.import_extension('foo', [ + ("get_classtype", "METH_NOARGS", + """ + Py_INCREF(&PyClass_Type); + return (PyObject*)&PyClass_Type; + """)]) + class C: + pass + assert module.get_classtype() is type(C) + + def test_pyclass_new_no_bases(self): + module = self.import_extension('foo', [ + ("new_foo", "METH_O", + """ + return PyClass_New(NULL, PyDict_New(), args); + """)]) + FooClass = module.new_foo("FooClass") + class Cls1: + pass + assert type(FooClass) is type(Cls1) + assert FooClass.__bases__ == Cls1.__bases__ diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_codecs.py @@ -0,0 +1,15 @@ +# encoding: iso-8859-15 +from pypy.module.cpyext.test.test_api import BaseApiTest +from rpython.rtyper.lltypesystem import rffi +from pypy.module.cpyext.codecs import ( + PyCodec_IncrementalEncoder, PyCodec_IncrementalDecoder) + +class TestCodecs(BaseApiTest): + def test_incremental(self, space): + utf8 = rffi.str2charp('utf-8') + w_encoder = PyCodec_IncrementalEncoder(space, utf8, None) + w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) + w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) + w_decoded = space.call_method(w_decoder, 'decode', w_encoded) + assert space.unicode_w(w_decoded) == u'späm' + rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_complexobject.py b/pypy/module/cpyext/test/test_complexobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_complexobject.py @@ -0,0 +1,64 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.complexobject import ( + PyComplex_FromDoubles, PyComplex_RealAsDouble, PyComplex_ImagAsDouble) + +class TestComplexObject(BaseApiTest): + def test_complexobject(self, space): + w_value = PyComplex_FromDoubles(space, 1.2, 3.4) + assert space.unwrap(w_value) == 1.2+3.4j + assert PyComplex_RealAsDouble(space, w_value) == 1.2 + assert PyComplex_ImagAsDouble(space, w_value) == 3.4 + + assert PyComplex_RealAsDouble(space, space.wrap(42)) == 42 + assert PyComplex_RealAsDouble(space, space.wrap(1.5)) == 1.5 + assert PyComplex_ImagAsDouble(space, space.wrap(1.5)) == 0.0 + + # cpython accepts anything for PyComplex_ImagAsDouble + assert PyComplex_ImagAsDouble(space, space.w_None) == 0.0 + with raises_w(space, TypeError): + PyComplex_RealAsDouble(space, space.w_None) + +class AppTestCComplex(AppTestCpythonExtensionBase): + def test_AsCComplex(self): + module = self.import_extension('foo', [ + ("as_tuple", "METH_O", + """ + Py_complex c = PyComplex_AsCComplex(args); + if (PyErr_Occurred()) return NULL; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.as_tuple(12-34j) == (12, -34) + assert module.as_tuple(-3.14) == (-3.14, 0.0) + raises(TypeError, module.as_tuple, "12") + + def test_FromCComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + return PyComplex_FromCComplex(c); + """)]) + assert module.test() == 1.2 + 3.4j + + def test_PyComplex_to_WComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + PyObject *obj = PyObject_Malloc(sizeof(PyComplexObject)); + obj = PyObject_Init(obj, &PyComplex_Type); + assert(obj != NULL); + ((PyComplexObject *)obj)->cval = c; + return obj; + """)]) + assert module.test() == 1.2 + 3.4j + + def test_WComplex_to_PyComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_O", + """ + Py_complex c = ((PyComplexObject *)args)->cval; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.test(1.2 + 3.4j) == (1.2, 3.4) diff --git a/pypy/module/cpyext/test/test_cparser.py b/pypy/module/cpyext/test/test_cparser.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cparser.py @@ -0,0 +1,260 @@ +from rpython.flowspace.model import const +from rpython.flowspace.objspace import build_flow +from rpython.translator.simplify import simplify_graph +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.cparser import parse_source, CTypeSpace + +def test_configure(): + decl = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + TestFloatObject = cts.definitions['TestFloatObject'] + assert isinstance(TestFloatObject, lltype.Struct) + assert TestFloatObject.c_ob_refcnt == rffi.SSIZE_T + assert TestFloatObject.c_ob_pypy_link == rffi.SSIZE_T + assert TestFloatObject.c_ob_fval == rffi.DOUBLE + +def test_simple(): + decl = "typedef ssize_t Py_ssize_t;" + cts = parse_source(decl) + assert cts.definitions == {'Py_ssize_t': rffi.SSIZE_T} + +def test_macro(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } PyFloatObject; + """ + cts = parse_source(decl) + assert 'PyFloatObject' in cts.definitions + assert 'PyObject_HEAD' in cts.macros + +def test_include(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts1 = parse_source(cdef1) + Type = cts1.definitions['Type'] + assert isinstance(Type, lltype.Struct) + cts2 = parse_source(cdef2, includes=[cts1]) + assert 'Type' not in cts2.definitions + Object = cts2.definitions['Object'] + assert Object.c_type.TO is Type + +def test_multiple_sources(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts = CTypeSpace() + cts.parse_source(cdef1) + Type = cts.definitions['Type'] + assert isinstance(Type, lltype.Struct) + assert 'Object' not in cts.definitions + cts.parse_source(cdef2) + Object = cts.definitions['Object'] + assert Object.c_type.TO is Type + +def test_incomplete(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + """ + cts = parse_source(cdef) + Object = cts.gettype('Object') + assert isinstance(Object, lltype.Struct) + +def test_recursive(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + typedef struct _typeobject { + Object *obj; + } Type; + """ + cts = parse_source(cdef) + Object = cts.definitions['Object'] + assert isinstance(Object, lltype.Struct) + hash(Object) + +def test_nested_struct(): + cdef = """ + typedef struct { + int x; + } foo; + typedef struct { + foo y; + } bar; + """ + cts = parse_source(cdef) + bar = cts.gettype('bar') + assert isinstance(bar, lltype.Struct) + hash(bar) # bar is hashable + +def test_const(): + cdef = """ + typedef struct { + const char * const foo; + } bar; + """ + cts = parse_source(cdef) + assert cts.definitions['bar'].c_foo == rffi.CONST_CCHARP != rffi.CCHARP + +def test_gettype(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + assert cts.gettype('Py_ssize_t') == rffi.SSIZE_T + assert cts.gettype('TestFloatObject *').TO.c_ob_refcnt == rffi.SSIZE_T + assert cts.cast('Py_ssize_t', 42) == rffi.cast(rffi.SSIZE_T, 42) + +def test_parse_funcdecl(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + + typedef TestFloatObject* (*func_t)(int, int); + """ + cts = parse_source(decl) + func_decl = cts.parse_func("func_t * some_func(TestFloatObject*)") + assert func_decl.name == 'some_func' + assert func_decl.get_llresult(cts) == cts.gettype('func_t*') + assert func_decl.get_llargs(cts) == [cts.gettype('TestFloatObject *')] + +def test_write_func(): + from ..api import ApiFunction + from rpython.translator.c.database import LowLevelDatabase + db = LowLevelDatabase() + cdef = """ + typedef ssize_t Py_ssize_t; + """ + cts = parse_source(cdef) + cdecl = "Py_ssize_t * some_func(Py_ssize_t*)" + decl = cts.parse_func(cdecl) + api_function = ApiFunction( + decl.get_llargs(cts), decl.get_llresult(cts), lambda space, x: None, + cdecl=decl) + assert (api_function.get_api_decl('some_func', db) == + "PyAPI_FUNC(Py_ssize_t *) some_func(Py_ssize_t * arg0);") + + +def test_wchar_t(): + cdef = """ + typedef struct { wchar_t* x; } test; + """ + cts = parse_source(cdef, headers=['stddef.h']) + obj = lltype.malloc(cts.gettype('test'), flavor='raw') + obj.c_x = cts.cast('wchar_t*', 0) + obj.c_x = lltype.nullptr(rffi.CWCHARP.TO) + lltype.free(obj, flavor='raw') + + +def test_translate_cast(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.cast('Py_ssize_t*', 0) + graph = build_flow(f) + simplify_graph(graph) + assert len(graph.startblock.operations) == 1 + op = graph.startblock.operations[0] + assert op.args[0] == const(rffi.cast) + assert op.args[1].value is cts.gettype('Py_ssize_t*') + +def test_translate_gettype(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.gettype('Py_ssize_t*') + graph = build_flow(f) + simplify_graph(graph) + # Check that the result is constant-folded + assert graph.startblock.operations == [] + [link] = graph.startblock.exits + assert link.target is graph.returnblock + assert link.args[0] == const(rffi.CArrayPtr(rffi.SSIZE_T)) diff --git a/pypy/module/cpyext/test/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_datetime.py @@ -0,0 +1,354 @@ +import pytest + +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.cdatetime import * +from pypy.module.cpyext.cdatetime import ( + _PyDateTime_Import, _PyDateTime_FromDateAndTime, _PyDate_FromDate, + _PyTime_FromTime, _PyDelta_FromDelta) +import datetime + +class TestDatetime(BaseApiTest): + def test_date(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDate_FromDate(space, 2010, 06, 03, date_api.c_DateType) + assert space.unwrap(space.str(w_date)) == '2010-06-03' + + assert PyDate_Check(space, w_date) + assert PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + + def test_time(self, space): + date_api = _PyDateTime_Import(space) + w_time = _PyTime_FromTime( + space, 23, 15, 40, 123456, space.w_None, date_api.c_TimeType) + assert space.unwrap(space.str(w_time)) == '23:15:40.123456' + + assert PyTime_Check(space, w_time) + assert PyTime_CheckExact(space, w_time) + + assert PyDateTime_TIME_GET_HOUR(space, w_time) == 23 + assert PyDateTime_TIME_GET_MINUTE(space, w_time) == 15 + assert PyDateTime_TIME_GET_SECOND(space, w_time) == 40 + assert PyDateTime_TIME_GET_MICROSECOND(space, w_time) == 123456 + + def test_datetime(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDateTime_FromDateAndTime( + space, 2010, 06, 03, 23, 15, 40, 123456, space.w_None, + date_api.c_DateTimeType) + assert space.unwrap(space.str(w_date)) == '2010-06-03 23:15:40.123456' + + assert PyDateTime_Check(space, w_date) + assert PyDateTime_CheckExact(space, w_date) + assert PyDate_Check(space, w_date) + assert not PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + assert PyDateTime_DATE_GET_HOUR(space, w_date) == 23 + assert PyDateTime_DATE_GET_MINUTE(space, w_date) == 15 + assert PyDateTime_DATE_GET_SECOND(space, w_date) == 40 + assert PyDateTime_DATE_GET_MICROSECOND(space, w_date) == 123456 + + def test_delta(self, space): + date_api = _PyDateTime_Import(space) + w_delta = space.appexec( + [space.wrap(3), space.wrap(15)], """(days, seconds): + from datetime import timedelta + return timedelta(days, seconds) + """) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + w_delta = _PyDelta_FromDelta(space, 10, 20, 30, True, date_api.c_DeltaType) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + assert PyDateTime_DELTA_GET_DAYS(space, w_delta) == 10 + assert PyDateTime_DELTA_GET_SECONDS(space, w_delta) == 20 + assert PyDateTime_DELTA_GET_MICROSECONDS(space, w_delta) == 30 + + def test_fromtimestamp(self, space): + w_args = space.wrap((0,)) + w_date = PyDate_FromTimestamp(space, w_args) + date = datetime.date.fromtimestamp(0) + assert space.unwrap(space.str(w_date)) == str(date) + From pypy.commits at gmail.com Wed Sep 12 08:34:43 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:34:43 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: remove unicode_w from cpyext tests Message-ID: <5b9907e3.1c69fb81.3c5cc.7e8a@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95107:2a457f223c9a Date: 2018-09-12 15:33 +0300 http://bitbucket.org/pypy/pypy/changeset/2a457f223c9a/ Log: remove unicode_w from cpyext tests diff --git a/pypy/module/cpyext/test/buffer_test.c b/pypy/module/cpyext/test/buffer_test.c --- a/pypy/module/cpyext/test/buffer_test.c +++ b/pypy/module/cpyext/test/buffer_test.c @@ -192,6 +192,10 @@ */ #define GET_PYBUF_FLAG(FLAG) \ buf_flag = PyUnicode_FromString(#FLAG); \ + if (buf_flag == NULL) { \ + Py_DECREF(tmp); \ + return NULL; \ + } \ flag_matches = PyObject_RichCompareBool(buf_flag, tmp, Py_EQ); \ Py_DECREF(buf_flag); \ if (flag_matches == 1) { \ diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py --- a/pypy/module/cpyext/test/test_bytesobject.py +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -596,7 +596,7 @@ encoding = rffi.str2charp("latin-1") w_res = PyString_AsDecodedObject(space, w_str, encoding, None) rffi.free_charp(encoding) - assert space.unwrap(w_res) == u"caf\xe9" + assert space.utf8_w(w_res) == u"caf\xe9".encode('utf8') def test_eq(self, space): assert 1 == _PyString_Eq( diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unicode_w(w_decoded) == u'späm' + assert space.utf8_w(w_decoded) == u'späm'.encode('utf8') rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -132,7 +132,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unicode_w(w_a) == u'caf\xe9' + assert space.utf8_w(w_a) == u'caf\xe9'.encode('utf8') lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -214,9 +214,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unicode_w(api.PyObject_Unicode(None)) == u"" - assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.utf8_w(api.PyObject_Unicode(None)) == u"" + assert space.utf8_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.utf8_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -189,12 +189,12 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unicode_w(word)): + for (i, char) in enumerate(space.utf8_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char with raises_w(space, TypeError): - PyUnicode_AsUnicode(space, space.wrap('spam')) + PyUnicode_AsUnicode(space, space.newbytes('spam')) utf_8 = rffi.str2charp('utf-8') encoded = PyUnicode_AsEncodedString(space, space.wrap(u'sp�m'), @@ -227,12 +227,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unicode_w(w_res) == u'sp\x09m' + assert space.utf8_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) decref(space, res) - assert space.unicode_w(w_res) == u'sp\x09m' + assert space.utf8_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -281,17 +281,18 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unicode_w(w_u) == u'sp\x134m' + assert space.utf8_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unicode_w(w_u) == 'sp' + assert space.utf8_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') - w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') + w_b = PyUnicode_EncodeUTF8(space, u, 4, None) + assert space.type(w_b) is space.w_bytes + assert space.bytes_w(w_b) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -389,18 +390,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unicode_w( + assert space.utf8_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unicode_w( - PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' + assert space.utf8_w( + PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9'.encode('utf8') w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unicode_w(w_text) == "test" + assert space.utf8_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -416,7 +417,7 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unicode_w(PyUnicode_Decode( + assert space.utf8_w(PyUnicode_Decode( space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( @@ -433,7 +434,7 @@ w_bytes = PyUnicode_EncodeMBCS(space, wbuf, 4, None) rffi.free_wcharp(wbuf) assert space.type(w_bytes) is space.w_bytes - assert space.str_w(w_bytes) == "abc?" + assert space.text_w(w_bytes) == "abc?" def test_escape(self, space): def test(ustr): @@ -534,7 +535,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unicode_w(w_res) == u'ab' + assert space.utf8_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -605,30 +606,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert (space.unicode_w(w_formated) == - space.unicode_w(space.mod(w_format, w_args))) + assert (space.utf8_w(w_formated) == + space.utf8_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unicode_w(w_joined) == u'ab' + assert space.utf8_w(w_joined) == u'ab' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unicode_w(w_char) == u'A' + assert space.utf8_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unicode_w(w_char) == u'\0' + assert space.utf8_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unicode_w(w_char) == u'\uFFFF' + assert space.utf8_w(w_char) == u'\uFFFF'.encode('utf8') def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unicode_w( + assert u"zbzbabab" == space.utf8_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unicode_w( + assert u"zbzbzbzb" == space.utf8_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): From pypy.commits at gmail.com Wed Sep 12 08:34:45 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:34:45 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fix recursive call to decode_object, set errorhandler Message-ID: <5b9907e5.1c69fb81.be974.96fe@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95108:f33f0bb4a73e Date: 2018-09-12 15:33 +0300 http://bitbucket.org/pypy/pypy/changeset/f33f0bb4a73e/ Log: fix recursive call to decode_object, set errorhandler diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -628,8 +628,10 @@ else: errors = None + state = space.fromcache(CodecState) result, _, length, byteorder = unicodehelper.str_decode_utf_32_helper( - string, errors, final=True, errorhandler=None, byteorder=byteorder) + string, errors, final=True, errorhandler=state.decode_error_handler, + byteorder=byteorder) if pbyteorder is not None: pbyteorder[0] = rffi.cast(rffi.INT_real, byteorder) return space.newutf8(result, length) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -452,9 +452,7 @@ return self._value def utf8_w(self, space): - # Use the default encoding. - encoding = getdefaultencoding(space) - return space.utf8_w(decode_object(space, self, encoding, None)) + return self._value def buffer_w(self, space, flags): space.check_buf_flags(flags, True) From pypy.commits at gmail.com Wed Sep 12 08:38:49 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:38:49 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: default for allow_surrogates is False Message-ID: <5b9908d9.1c69fb81.f8e17.7220@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95109:801748ba582a Date: 2018-09-12 14:16 +0300 http://bitbucket.org/pypy/pypy/changeset/801748ba582a/ Log: default for allow_surrogates is False diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1226,21 +1226,21 @@ def utf8_encode_utf_16(s, errors, errorhandler=None, - allow_surrogates=True): + allow_surrogates=False): return utf8_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "native", 'utf-16-' + BYTEORDER2) def utf8_encode_utf_16_be(s, errors, errorhandler=None, - allow_surrogates=True): + allow_surrogates=False): return utf8_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "big", 'utf-16-be') def utf8_encode_utf_16_le(s, errors, errorhandler=None, - allow_surrogates=True): + allow_surrogates=False): return utf8_encode_utf_16_helper(s, errors, errorhandler, allow_surrogates, "little", 'utf-16-le') From pypy.commits at gmail.com Wed Sep 12 08:38:51 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:38:51 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5b9908db.1c69fb81.178eb.89b3@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95110:0e9637ca2d97 Date: 2018-09-12 15:02 +0300 http://bitbucket.org/pypy/pypy/changeset/0e9637ca2d97/ Log: merge py3.5 into branch diff too long, truncating to 2000 out of 23978 lines diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_arraymodule.py @@ -0,0 +1,190 @@ +import pytest +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.conftest import option + +class AppTestArrayModule(AppTestCpythonExtensionBase): + enable_leak_checking = True + + def test_basic(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + assert arr.typecode == 'i' + assert arr.itemsize == 4 + assert arr[2] == 3 + assert len(arr.buffer_info()) == 2 + exc = raises(TypeError, module.array.append) + errstr = str(exc.value) + assert errstr.startswith("descriptor 'append' of") + arr.append(4) + assert arr.tolist() == [1, 2, 3, 4] + assert len(arr) == 4 + + def test_iter(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3]) + sum = 0 + for i in arr: + sum += i + assert sum == 6 + + def test_index(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[3] == 4 + raises(IndexError, arr.__getitem__, 10) + del arr[2] + assert arr.tolist() == [1, 2, 4] + arr[2] = 99 + assert arr.tolist() == [1, 2, 99] + + def test_slice_get(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[:].tolist() == [1, 2, 3, 4] + assert arr[1:].tolist() == [2, 3, 4] + assert arr[:2].tolist() == [1, 2] + assert arr[1:3].tolist() == [2, 3] + + def test_slice_object(self): + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + assert arr[slice(1, 3)].tolist() == [2,3] + arr[slice(1, 3)] = module.array('i', [21, 22, 23]) + assert arr.tolist() == [1, 21, 22, 23, 4] + del arr[slice(1, 3)] + assert arr.tolist() == [1, 23, 4] + raises(TypeError, 'arr[slice(1, 3)] = "abc"') + + def test_buffer(self): + import sys + module = self.import_module(name='array') + arr = module.array('i', [1, 2, 3, 4]) + buf = memoryview(arr) + exc = raises(TypeError, "buf[1] = 1") + assert str(exc.value) == "cannot modify read-only memory" + if sys.byteorder == 'big': + expected = b'\0\0\0\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04' + else: + expected = b'\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0' + assert bytes(buf) == expected + + def test_releasebuffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + module.create_and_release_buffer(arr) + assert module.get_releasebuffer_cnt() == 1 + + def test_Py_buffer(self): + module = self.import_module(name='array') + arr = module.array('i', [1,2,3,4]) + assert module.get_releasebuffer_cnt() == 0 + m = memoryview(arr) + assert module.get_releasebuffer_cnt() == 0 + del m + self.debug_collect() + assert module.get_releasebuffer_cnt() == 1 + + def test_0d_view(self): + module = self.import_module(name='array') + arr = module.array('B', b'\0\0\0\x01') + buf = memoryview(arr).cast('i', shape=()) + assert bytes(buf) == b'\0\0\0\x01' + assert buf.shape == () + assert buf.strides == () + + def test_binop_mul_impl(self): + # check that rmul is called + module = self.import_module(name='array') + arr = module.array('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + module.switch_multiply() + res = [1, 2, 3] * arr + assert res == [2, 4, 6] + + @pytest.mark.xfail + def test_subclass_dealloc(self): + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + module.readbuffer_as_string(arr) + class A(object): + pass + assert not module.same_dealloc(arr, module.array('i', [2])) + assert module.same_dealloc(arr, A()) + + def test_string_buf(self): + module = self.import_module(name='array') + arr = module.array('u', '123') + view = memoryview(arr) + assert view.itemsize == 4 + assert module.write_buffer_len(arr) == 12 + assert len(module.readbuffer_as_string(arr)) == 12 + assert len(module.readbuffer_as_string(view)) == 12 + + def test_subclass(self): + import struct + module = self.import_module(name='array') + class Sub(module.array): + pass + + arr = Sub('i', [2]) + res = [1, 2, 3] * arr + assert res == [1, 2, 3, 1, 2, 3] + + val = module.readbuffer_as_string(arr) + assert val == struct.pack('i', 2) + + def test_unicode_readbuffer(self): + # Not really part of array, refactor + import struct + module = self.import_module(name='array') + val = module.readbuffer_as_string(b'abcd') + assert val == b'abcd' + + def test_readinto(self): + module = self.import_module(name='array') + a = module.array('B') + a.fromstring(b'0123456789') + filename = self.udir + "/_test_file" + f = open(filename, 'w+b') + f.write(b'foobar') + f.seek(0) + n = f.readinto(a) + f.close() + assert n == 6 + assert len(a) == 10 + assert a.tostring() == b'foobar6789' + + def test_iowrite(self): + module = self.import_module(name='array') + from io import BytesIO + a = module.array('B') + a.fromstring(b'0123456789') + fd = BytesIO() + # only test that it works + fd.write(a) + + def test_getitem_via_PySequence_GetItem(self): + module = self.import_module(name='array') + a = module.array('i', range(10)) + # call via tp_as_mapping.mp_subscript + assert 5 == a[-5] + # PySequence_ITEM used to call space.getitem() which + # prefers tp_as_mapping.mp_subscript over tp_as_sequence.sq_item + # Now fixed so this test raises (array_item does not add len(a), + # array_subscr does) + raises(IndexError, module.getitem, a, -5) + + def test_subclass_with_attribute(self): + module = self.import_module(name='array') + class Sub(module.array): + def addattrib(self): + print('called addattrib') + self.attrib = True + import gc + module.subclass_with_attribute(Sub, "addattrib", "attrib", gc.collect) + assert Sub.__module__ == __name__ diff --git a/pypy/module/cpyext/test/test_boolobject.py b/pypy/module/cpyext/test/test_boolobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_boolobject.py @@ -0,0 +1,48 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.boolobject import PyBool_FromLong + +class TestBoolObject(BaseApiTest): + def test_fromlong(self, space): + for i in range(-3, 3): + obj = PyBool_FromLong(space, i) + if i: + assert obj is space.w_True + else: + assert obj is space.w_False + +class AppTestBoolMacros(AppTestCpythonExtensionBase): + def test_macros(self): + module = self.import_extension('foo', [ + ("get_true", "METH_NOARGS", "Py_RETURN_TRUE;"), + ("get_false", "METH_NOARGS", "Py_RETURN_FALSE;"), + ]) + assert module.get_true() == True + assert module.get_false() == False + + def test_toint(self): + module = self.import_extension('foo', [ + ("to_int", "METH_O", + ''' + if (args->ob_type->tp_as_number && args->ob_type->tp_as_number->nb_int) { + return args->ob_type->tp_as_number->nb_int(args); + } + else { + PyErr_SetString(PyExc_TypeError,"cannot convert bool to int"); + return NULL; + } + '''), ]) + assert module.to_int(False) == 0 + assert module.to_int(True) == 1 + + def test_check(self): + module = self.import_extension('foo', [ + ("type_check", "METH_O", + ''' + return PyLong_FromLong(PyBool_Check(args)); + ''')]) + assert module.type_check(True) + assert module.type_check(False) + assert not module.type_check(None) + assert not module.type_check(1.0) + diff --git a/pypy/module/cpyext/test/test_borrow.py b/pypy/module/cpyext/test/test_borrow.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_borrow.py @@ -0,0 +1,69 @@ +import py +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.pyobject import make_ref + + +class AppTestBorrow(AppTestCpythonExtensionBase): + def test_tuple_borrowing(self): + module = self.import_extension('foo', [ + ("test_borrowing", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *f = PyFloat_FromDouble(42.0); + PyObject *g = NULL; + printf("Refcnt1: %ld\\n", f->ob_refcnt); + PyTuple_SetItem(t, 0, f); // steals reference + printf("Refcnt2: %ld\\n", f->ob_refcnt); + f = PyTuple_GetItem(t, 0); // borrows reference + printf("Refcnt3: %ld\\n", f->ob_refcnt); + g = PyTuple_GetItem(t, 0); // borrows reference again + printf("Refcnt4: %ld\\n", f->ob_refcnt); + printf("COMPARE: %i\\n", f == g); + fflush(stdout); + Py_DECREF(t); + Py_RETURN_TRUE; + """), + ]) + assert module.test_borrowing() # the test should not leak + + def test_borrow_destroy(self): + module = self.import_extension('foo', [ + ("test_borrow_destroy", "METH_NOARGS", + """ + PyObject *i = PyLong_FromLong(42); + PyObject *j; + PyObject *t1 = PyTuple_Pack(1, i); + PyObject *t2 = PyTuple_Pack(1, i); + Py_DECREF(i); + + i = PyTuple_GetItem(t1, 0); + PyTuple_GetItem(t2, 0); + Py_DECREF(t2); + + j = PyLong_FromLong(PyLong_AsLong(i)); + Py_DECREF(t1); + return j; + """), + ]) + assert module.test_borrow_destroy() == 42 + + def test_double_borrow(self): + module = self.import_extension('foo', [ + ("run", "METH_NOARGS", + """ + PyObject *t = PyTuple_New(1); + PyObject *s = PyRun_String("set()", Py_eval_input, + Py_None, Py_None); + PyObject *w = PyWeakref_NewRef(s, Py_None); + PyTuple_SetItem(t, 0, s); + PyTuple_GetItem(t, 0); + PyTuple_GetItem(t, 0); + Py_DECREF(t); + return w; + """), + ]) + wr = module.run() + # check that the set() object was deallocated + self.debug_collect() + assert wr() is None diff --git a/pypy/module/cpyext/test/test_bytearrayobject.py b/pypy/module/cpyext/test/test_bytearrayobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytearrayobject.py @@ -0,0 +1,188 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestStringObject(AppTestCpythonExtensionBase): + def test_basic(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyByteArray_FromStringAndSize("Hello world", 12); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyByteArray_FromStringAndSize("Hello world", 12); + int result = 0; + + if(PyByteArray_Size(s) == 12) { + result = 1; + } + Py_DECREF(s); + return PyBool_FromLong(result); + """), + ("test_is_bytearray", "METH_VARARGS", + """ + return PyBool_FromLong(PyByteArray_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world\x00' + assert module.test_Size() + assert module.test_is_bytearray(bytearray(b"")) + assert not module.test_is_bytearray(()) + + def test_bytearray_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyByteArray_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyByteArray_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyByteArray_AsString(s); + if (c == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytearray() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytearray_mutable(self): + module = self.import_extension('foo', [ + ("mutable", "METH_NOARGS", + """ + PyObject *base; + base = PyByteArray_FromStringAndSize("test", 10); + if (PyByteArray_GET_SIZE(base) != 10) + return PyLong_FromLong(-PyByteArray_GET_SIZE(base)); + memcpy(PyByteArray_AS_STRING(base), "works", 6); + Py_INCREF(base); + return base; + """), + ]) + s = module.mutable() + if s == b'\x00' * 10: + assert False, "no RW access to bytearray" + assert s[:6] == b'works\x00' + + def test_AsByteArray(self): + module = self.import_extension('foo', [ + ("getbytearray", "METH_NOARGS", + """ + const char *c; + PyObject *s2, *s1 = PyByteArray_FromStringAndSize("test", 4); + if (s1 == NULL) + return NULL; + c = PyByteArray_AsString(s1); + s2 = PyByteArray_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytearray() + assert s == b'test' + + def test_manipulations(self): + import sys + module = self.import_extension('foo', [ + ("bytearray_from_bytes", "METH_VARARGS", + ''' + return PyByteArray_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("bytes_from_bytearray", "METH_VARARGS", + ''' + char * buf; + int n; + PyObject * obj; + obj = PyTuple_GetItem(args, 0); + buf = PyByteArray_AsString(obj); + if (buf == NULL) + { + PyErr_SetString(PyExc_ValueError, "non-null bytearray object expected"); + return NULL; + } + n = PyByteArray_Size(obj); + return PyBytes_FromStringAndSize(buf, n); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject * ret, *right, *left; + PyObject *ba1, *ba2; + if (!PyArg_ParseTuple(args, "OO", &left, &right)) { + return PyUnicode_FromString("parse failed"); + } + ba1 = PyByteArray_FromObject(left); + ba2 = PyByteArray_FromObject(right); + if (ba1 == NULL || ba2 == NULL) + { + /* exception should be set */ + return NULL; + } + ret = PyByteArray_Concat(ba1, ba2); + return ret; + """)]) + assert module.bytearray_from_bytes(b"huheduwe") == b"huhe" + assert module.bytes_from_bytearray(bytearray(b'abc')) == b'abc' + if '__pypy__' in sys.builtin_module_names: + # CPython only makes an assert. + raises(ValueError, module.bytes_from_bytearray, 4.0) + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + assert not isinstance(ret, str) + assert isinstance(ret, bytearray) + raises(TypeError, module.concat, b'abc', u'def') + + def test_bytearray_resize(self): + module = self.import_extension('foo', [ + ("bytearray_resize", "METH_VARARGS", + ''' + PyObject *obj, *ba; + int newsize, oldsize, ret; + if (!PyArg_ParseTuple(args, "Oi", &obj, &newsize)) { + return PyUnicode_FromString("parse failed"); + } + + ba = PyByteArray_FromObject(obj); + if (ba == NULL) + return NULL; + oldsize = PyByteArray_Size(ba); + if (oldsize == 0) + { + return PyUnicode_FromString("oldsize is 0"); + } + ret = PyByteArray_Resize(ba, newsize); + if (ret != 0) + { + printf("ret, oldsize, newsize= %d, %d, %d\\n", ret, oldsize, newsize); + return NULL; + } + return ba; + ''' + )]) + ret = module.bytearray_resize(b'abc', 6) + assert len(ret) == 6,"%s, len=%d" % (ret, len(ret)) + assert ret[:4] == b'abc\x00' + ret = module.bytearray_resize(b'abcdefghi', 4) + assert len(ret) == 4,"%s, len=%d" % (ret, len(ret)) + assert ret == b'abcd' diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_bytesobject.py @@ -0,0 +1,611 @@ +# encoding: utf-8 +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.bytesobject import ( + new_empty_str, PyBytesObject, _PyBytes_Resize, PyBytes_Concat, + PyBytes_ConcatAndDel, + _PyBytes_Eq, + _PyBytes_Join) +from pypy.module.cpyext.api import PyObjectP, PyObject, Py_ssize_tP +from pypy.module.cpyext.pyobject import decref, from_ref, make_ref +from pypy.module.cpyext.buffer import PyObject_AsCharBuffer + + +class AppTestBytesObject(AppTestCpythonExtensionBase): + def test_bytesobject(self): + module = self.import_extension('foo', [ + ("get_hello1", "METH_NOARGS", + """ + return PyBytes_FromStringAndSize( + "Hello world", 11); + """), + ("get_hello2", "METH_NOARGS", + """ + return PyBytes_FromString("Hello world"); + """), + ("test_Size", "METH_NOARGS", + """ + PyObject* s = PyBytes_FromString("Hello world"); + int result = PyBytes_Size(s); + + Py_DECREF(s); + return PyLong_FromLong(result); + """), + ("test_Size_exception", "METH_NOARGS", + """ + PyObject* f = PyFloat_FromDouble(1.0); + PyBytes_Size(f); + + Py_DECREF(f); + return NULL; + """), + ("test_is_bytes", "METH_VARARGS", + """ + return PyBool_FromLong(PyBytes_Check(PyTuple_GetItem(args, 0))); + """)], prologue='#include ') + assert module.get_hello1() == b'Hello world' + assert module.get_hello2() == b'Hello world' + assert module.test_Size() + raises(TypeError, module.test_Size_exception) + + assert module.test_is_bytes(b"") + assert not module.test_is_bytes(()) + + def test_bytes_buffer_init(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject *s, *t; + char* c; + + s = PyBytes_FromStringAndSize(NULL, 4); + if (s == NULL) + return NULL; + t = PyBytes_FromStringAndSize(NULL, 3); + if (t == NULL) + return NULL; + Py_DECREF(t); + c = PyBytes_AS_STRING(s); + c[0] = 'a'; + c[1] = 'b'; + c[2] = 0; + c[3] = 'c'; + return s; + """), + ]) + s = module.getbytes() + assert len(s) == 4 + assert s == b'ab\x00c' + + def test_bytes_tp_alloc(self): + module = self.import_extension('foo', [ + ("tpalloc", "METH_NOARGS", + """ + PyObject *base; + PyTypeObject * type; + PyObject *obj; + base = PyBytes_FromString("test"); + if (PyBytes_GET_SIZE(base) != 4) + return PyLong_FromLong(-PyBytes_GET_SIZE(base)); + type = base->ob_type; + if (type->tp_itemsize != 1) + return PyLong_FromLong(type->tp_itemsize); + obj = type->tp_alloc(type, 10); + if (PyBytes_GET_SIZE(obj) != 10) + return PyLong_FromLong(PyBytes_GET_SIZE(obj)); + /* cannot work, there is only RO access + memcpy(PyBytes_AS_STRING(obj), "works", 6); */ + Py_INCREF(obj); + return obj; + """), + ('alloc_rw', "METH_NOARGS", + ''' + PyObject *obj = (PyObject*)_PyObject_NewVar(&PyBytes_Type, 10); + memcpy(PyBytes_AS_STRING(obj), "works", 6); + return (PyObject*)obj; + '''), + ]) + s = module.alloc_rw() + assert s[:6] == b'works\0' # s[6:10] contains random garbage + s = module.tpalloc() + assert s == b'\x00' * 10 + + def test_AsString(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + char *c; + PyObject* s2, *s1 = PyBytes_FromStringAndSize("test", 4); + c = PyBytes_AsString(s1); + s2 = PyBytes_FromStringAndSize(c, 4); + Py_DECREF(s1); + return s2; + """), + ]) + s = module.getbytes() + assert s == b'test' + + def test_manipulations(self): + module = self.import_extension('foo', [ + ("bytes_as_string", "METH_VARARGS", + ''' + return PyBytes_FromStringAndSize(PyBytes_AsString( + PyTuple_GetItem(args, 0)), 4); + ''' + ), + ("concat", "METH_VARARGS", + """ + PyObject ** v; + PyObject * left = PyTuple_GetItem(args, 0); + Py_INCREF(left); /* the reference will be stolen! */ + v = &left; + PyBytes_Concat(v, PyTuple_GetItem(args, 1)); + return *v; + """)]) + assert module.bytes_as_string(b"huheduwe") == b"huhe" + ret = module.concat(b'abc', b'def') + assert ret == b'abcdef' + + def test_py_bytes_as_string_None(self): + module = self.import_extension('foo', [ + ("string_None", "METH_VARARGS", + ''' + if (PyBytes_AsString(Py_None)) { + Py_RETURN_NONE; + } + return NULL; + ''' + )]) + raises(TypeError, module.string_None) + + def test_AsStringAndSize(self): + module = self.import_extension('foo', [ + ("getbytes", "METH_NOARGS", + """ + PyObject* s1 = PyBytes_FromStringAndSize("te\\0st", 5); + char *buf; + Py_ssize_t len; + if (PyBytes_AsStringAndSize(s1, &buf, &len) < 0) + return NULL; + if (len != 5) { + PyErr_SetString(PyExc_AssertionError, "Bad Length"); + return NULL; + } + if (PyBytes_AsStringAndSize(s1, &buf, NULL) >= 0) { + PyErr_SetString(PyExc_AssertionError, "Should Have failed"); + return NULL; + } + PyErr_Clear(); + Py_DECREF(s1); + Py_INCREF(Py_None); + return Py_None; + """), + ("c_only", "METH_NOARGS", + """ + int ret; + char * buf2; + PyObject * obj = PyBytes_FromStringAndSize(NULL, 1024); + if (!obj) + return NULL; + buf2 = PyBytes_AsString(obj); + if (!buf2) + return NULL; + /* buf should not have been forced, issue #2395 */ + ret = _PyBytes_Resize(&obj, 512); + if (ret < 0) + return NULL; + Py_DECREF(obj); + Py_INCREF(Py_None); + return Py_None; + """), + ]) + module.getbytes() + module.c_only() + + def test_FromFormat(self): + module = self.import_extension('foo', [ + ("fmt", "METH_VARARGS", + """ + PyObject* fmt = PyTuple_GetItem(args, 0); + int n = PyLong_AsLong(PyTuple_GetItem(args, 1)); + PyObject* result = PyBytes_FromFormat(PyBytes_AsString(fmt), n); + return result; + """), + ]) + print(module.fmt(b'd:%d', 10)) + assert module.fmt(b'd:%d', 10) == b'd:10' + + def test_suboffsets(self): + module = self.import_extension('foo', [ + ("check_suboffsets", "METH_O", + """ + Py_buffer view; + PyObject_GetBuffer(args, &view, 0); + return PyLong_FromLong(view.suboffsets == NULL); + """)]) + assert module.check_suboffsets(b'1234') == 1 + +class TestBytes(BaseApiTest): + def test_bytes_resize(self, space): + py_str = new_empty_str(space, 10) + ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + py_str.c_ob_sval[0] = 'a' + py_str.c_ob_sval[1] = 'b' + py_str.c_ob_sval[2] = 'c' + ar[0] = rffi.cast(PyObject, py_str) + _PyBytes_Resize(space, ar, 3) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 3 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[3] == '\x00' + # the same for growing + ar[0] = rffi.cast(PyObject, py_str) + _PyBytes_Resize(space, ar, 10) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 10 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[10] == '\x00' + decref(space, ar[0]) + lltype.free(ar, flavor='raw') + + def test_Concat(self, space): + ref = make_ref(space, space.newbytes('abc')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref + prev_refcnt = ref.c_ob_refcnt + PyBytes_Concat(space, ptr, space.newbytes('def')) + assert ref.c_ob_refcnt == prev_refcnt - 1 + assert space.bytes_w(from_ref(space, ptr[0])) == 'abcdef' + with raises_w(space, TypeError): + PyBytes_Concat(space, ptr, space.w_None) + assert not ptr[0] + ptr[0] = lltype.nullptr(PyObject.TO) + PyBytes_Concat(space, ptr, space.newbytes('def')) # should not crash + lltype.free(ptr, flavor='raw') + + def test_ConcatAndDel(self, space): + ref1 = make_ref(space, space.newbytes('abc')) + ref2 = make_ref(space, space.newbytes('def')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref1 + prev_refcnf = ref2.c_ob_refcnt + PyBytes_ConcatAndDel(space, ptr, ref2) + assert space.bytes_w(from_ref(space, ptr[0])) == 'abcdef' + assert ref2.c_ob_refcnt == prev_refcnf - 1 + decref(space, ptr[0]) + ptr[0] = lltype.nullptr(PyObject.TO) + ref2 = make_ref(space, space.newbytes('foo')) + prev_refcnf = ref2.c_ob_refcnt + PyBytes_ConcatAndDel(space, ptr, ref2) # should not crash + assert ref2.c_ob_refcnt == prev_refcnf - 1 + lltype.free(ptr, flavor='raw') + + def test_asbuffer(self, space): + bufp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + + w_text = space.newbytes("text") + ref = make_ref(space, w_text) + prev_refcnt = ref.c_ob_refcnt + assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 + assert ref.c_ob_refcnt == prev_refcnt + assert lenp[0] == 4 + assert rffi.charp2str(bufp[0]) == 'text' + lltype.free(bufp, flavor='raw') + lltype.free(lenp, flavor='raw') + decref(space, ref) + + def test_eq(self, space): + assert 1 == _PyBytes_Eq(space, space.newbytes("hello"), space.newbytes("hello")) + assert 0 == _PyBytes_Eq(space, space.newbytes("hello"), space.newbytes("world")) + + def test_join(self, space): + w_sep = space.newbytes('') + w_seq = space.newtuple([space.newbytes('a'), space.newbytes('b')]) + w_joined = _PyBytes_Join(space, w_sep, w_seq) + assert space.bytes_w(w_joined) == 'ab' + + def test_FromObject(self, space, api): + w_obj = space.newbytes("test") + assert space.eq_w(w_obj, api.PyBytes_FromObject(w_obj)) + w_obj = space.call_function(space.w_bytearray, w_obj) + assert space.eq_w(w_obj, api.PyBytes_FromObject(w_obj)) + w_obj = space.wrap(u"test") + with raises_w(space, TypeError): + api.PyBytes_FromObject(w_obj) + PyString_AS_STRING(o); + PyString_AS_STRING(u); + + return o; + """)]) + assert module.test_macro_invocations() == '' + + def test_hash_and_state(self): + module = self.import_extension('foo', [ + ("test_hash", "METH_VARARGS", + ''' + PyObject* obj = (PyTuple_GetItem(args, 0)); + long hash = ((PyBytesObject*)obj)->ob_shash; + return PyLong_FromLong(hash); + ''' + ), + ("test_sstate", "METH_NOARGS", + ''' + PyObject *s = PyString_FromString("xyz"); + /*int sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + PyString_InternInPlace(&s); + /*sstate = ((PyBytesObject*)s)->ob_sstate; + printf("sstate now %d\\n", sstate);*/ + Py_DECREF(s); + return PyBool_FromLong(1); + '''), + ], prologue='#include ') + res = module.test_hash("xyz") + assert res == hash('xyz') + # doesn't really test, but if printf is enabled will prove sstate + assert module.test_sstate() + + def test_subclass(self): + # taken from PyStringArrType_Type in numpy's scalartypes.c.src + module = self.import_extension('bar', [ + ("newsubstr", "METH_O", + """ + PyObject * obj; + char * data; + int len; + + data = PyString_AS_STRING(args); + len = PyString_GET_SIZE(args); + if (data == NULL) + Py_RETURN_NONE; + obj = PyArray_Scalar(data, len); + return obj; + """), + ("get_len", "METH_O", + """ + return PyLong_FromLong(PyObject_Size(args)); + """), + ('has_nb_add', "METH_O", + ''' + if (args->ob_type->tp_as_number == NULL) { + Py_RETURN_FALSE; + } + if (args->ob_type->tp_as_number->nb_add == NULL) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; + '''), + ], prologue=""" + #include + PyTypeObject PyStringArrType_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "bar.string_", /* tp_name*/ + sizeof(PyBytesObject), /* tp_basicsize*/ + 0 /* tp_itemsize */ + }; + + static PyObject * + stringtype_repr(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + static PyObject * + stringtype_str(PyObject *self) + { + const char *dptr, *ip; + int len; + PyObject *new; + + ip = dptr = PyString_AS_STRING(self); + len = PyString_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = PyString_FromStringAndSize(ip, len); + if (new == NULL) { + return PyString_FromString(""); + } + return new; + } + + PyObject * + PyArray_Scalar(char *data, int n) + { + PyTypeObject *type = &PyStringArrType_Type; + PyObject *obj; + void *destptr; + int itemsize = n; + obj = type->tp_alloc(type, itemsize); + if (obj == NULL) { + return NULL; + } + destptr = PyString_AS_STRING(obj); + ((PyBytesObject *)obj)->ob_shash = -1; + memcpy(destptr, data, itemsize); + return obj; + } + """, more_init = ''' + PyStringArrType_Type.tp_alloc = NULL; + PyStringArrType_Type.tp_free = NULL; + + PyStringArrType_Type.tp_repr = stringtype_repr; + PyStringArrType_Type.tp_str = stringtype_str; + PyStringArrType_Type.tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE; + PyStringArrType_Type.tp_itemsize = sizeof(char); + PyStringArrType_Type.tp_base = &PyString_Type; + PyStringArrType_Type.tp_hash = PyString_Type.tp_hash; + if (PyType_Ready(&PyStringArrType_Type) < 0) INITERROR; + ''') + + a = module.newsubstr('abc') + assert module.has_nb_add('a') is False + assert module.has_nb_add(a) is False + assert type(a).__name__ == 'string_' + assert a == 'abc' + assert 3 == module.get_len(a) + b = module.newsubstr('') + assert 0 == module.get_len(b) + +class TestBytes(BaseApiTest): + def test_bytes_resize(self, space): + py_str = new_empty_str(space, 10) + ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + py_str.c_ob_sval[0] = 'a' + py_str.c_ob_sval[1] = 'b' + py_str.c_ob_sval[2] = 'c' + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 3) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 3 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[3] == '\x00' + # the same for growing + ar[0] = rffi.cast(PyObject, py_str) + _PyString_Resize(space, ar, 10) + py_str = rffi.cast(PyBytesObject, ar[0]) + assert py_str.c_ob_size == 10 + assert py_str.c_ob_sval[1] == 'b' + assert py_str.c_ob_sval[10] == '\x00' + decref(space, ar[0]) + lltype.free(ar, flavor='raw') + + def test_string_buffer(self, space): + py_str = new_empty_str(space, 10) + c_buf = py_str.c_ob_type.c_tp_as_buffer + assert c_buf + py_obj = rffi.cast(PyObject, py_str) + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, lltype.nullptr(Py_ssize_tP.TO)) == 1 + ref = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getsegcount, + py_obj, ref) == 1 + assert ref[0] == 10 + lltype.free(ref, flavor='raw') + ref = lltype.malloc(rffi.VOIDPP.TO, 1, flavor='raw') + assert generic_cpy_call(space, c_buf.c_bf_getreadbuffer, + py_obj, 0, ref) == 10 + lltype.free(ref, flavor='raw') + decref(space, py_obj) + + def test_Concat(self, space): + ref = make_ref(space, space.wrap('abc')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref + prev_refcnt = ref.c_ob_refcnt + PyString_Concat(space, ptr, space.wrap('def')) + assert ref.c_ob_refcnt == prev_refcnt - 1 + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + with pytest.raises(OperationError): + PyString_Concat(space, ptr, space.w_None) + assert not ptr[0] + ptr[0] = lltype.nullptr(PyObject.TO) + PyString_Concat(space, ptr, space.wrap('def')) # should not crash + lltype.free(ptr, flavor='raw') + + def test_ConcatAndDel(self, space): + ref1 = make_ref(space, space.wrap('abc')) + ref2 = make_ref(space, space.wrap('def')) + ptr = lltype.malloc(PyObjectP.TO, 1, flavor='raw') + ptr[0] = ref1 + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) + assert space.str_w(from_ref(space, ptr[0])) == 'abcdef' + assert ref2.c_ob_refcnt == prev_refcnf - 1 + decref(space, ptr[0]) + ptr[0] = lltype.nullptr(PyObject.TO) + ref2 = make_ref(space, space.wrap('foo')) + prev_refcnf = ref2.c_ob_refcnt + PyString_ConcatAndDel(space, ptr, ref2) # should not crash + assert ref2.c_ob_refcnt == prev_refcnf - 1 + lltype.free(ptr, flavor='raw') + + def test_format(self, space): + assert "1 2" == space.unwrap( + PyString_Format(space, space.wrap('%s %d'), space.wrap((1, 2)))) + + def test_asbuffer(self, space): + bufp = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') + lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw') + + w_text = space.wrap("text") + ref = make_ref(space, w_text) + prev_refcnt = ref.c_ob_refcnt + assert PyObject_AsCharBuffer(space, ref, bufp, lenp) == 0 + assert ref.c_ob_refcnt == prev_refcnt + assert lenp[0] == 4 + assert rffi.charp2str(bufp[0]) == 'text' + lltype.free(bufp, flavor='raw') + lltype.free(lenp, flavor='raw') + decref(space, ref) + + def test_intern(self, space): + buf = rffi.str2charp("test") + w_s1 = PyString_InternFromString(space, buf) + w_s2 = PyString_InternFromString(space, buf) + rffi.free_charp(buf) + assert w_s1 is w_s2 + + def test_AsEncodedObject(self, space): + ptr = space.wrap('abc') + + errors = rffi.str2charp("strict") + + encoding = rffi.str2charp("hex") + res = PyString_AsEncodedObject(space, ptr, encoding, errors) + assert space.unwrap(res) == "616263" + + res = PyString_AsEncodedObject(space, + ptr, encoding, lltype.nullptr(rffi.CCHARP.TO)) + assert space.unwrap(res) == "616263" + rffi.free_charp(encoding) + + encoding = rffi.str2charp("unknown_encoding") + with raises_w(space, LookupError): + PyString_AsEncodedObject(space, ptr, encoding, errors) + rffi.free_charp(encoding) + + rffi.free_charp(errors) + + NULL = lltype.nullptr(rffi.CCHARP.TO) + res = PyString_AsEncodedObject(space, ptr, NULL, NULL) + assert space.unwrap(res) == "abc" + with raises_w(space, TypeError): + PyString_AsEncodedObject(space, space.wrap(2), NULL, NULL) + + def test_AsDecodedObject(self, space): + w_str = space.wrap('caf\xe9') + encoding = rffi.str2charp("latin-1") + w_res = PyString_AsDecodedObject(space, w_str, encoding, None) + rffi.free_charp(encoding) + assert w_res._utf8 == u"caf\xe9".encode('utf8') + + def test_eq(self, space): + assert 1 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("hello")) + assert 0 == _PyString_Eq( + space, space.wrap("hello"), space.wrap("world")) + + def test_join(self, space): + w_sep = space.wrap('') + w_seq = space.wrap(['a', 'b']) + w_joined = _PyString_Join(space, w_sep, w_seq) + assert space.unwrap(w_joined) == 'ab' diff --git a/pypy/module/cpyext/test/test_capsule.py b/pypy/module/cpyext/test/test_capsule.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_capsule.py @@ -0,0 +1,29 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + +class AppTestCapsule(AppTestCpythonExtensionBase): + def test_capsule_import(self): + module = self.import_extension('foo', [ + ("set_ptr", "METH_O", + """ + PyObject *capsule, *module; + void *ptr = PyLong_AsVoidPtr(args); + if (PyErr_Occurred()) return NULL; + capsule = PyCapsule_New(ptr, "foo._ptr", NULL); + if (PyErr_Occurred()) return NULL; + module = PyImport_ImportModule("foo"); + PyModule_AddObject(module, "_ptr", capsule); + Py_DECREF(module); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; + """), + ("get_ptr", "METH_NOARGS", + """ + void *ptr = PyCapsule_Import("foo._ptr", 0); + if (PyErr_Occurred()) return NULL; + return PyLong_FromVoidPtr(ptr); + """)]) + module.set_ptr(1234) + assert 'capsule object "foo._ptr" at ' in str(module._ptr) + import gc; gc.collect() + assert module.get_ptr() == 1234 + del module._ptr diff --git a/pypy/module/cpyext/test/test_cell.py b/pypy/module/cpyext/test/test_cell.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cell.py @@ -0,0 +1,20 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestCell(AppTestCpythonExtensionBase): + def test_cell_type(self): + module = self.import_extension('foo', [ + ("cell_type", "METH_O", + """ + PyDict_SetItemString(args, "cell", (PyObject*)&PyCell_Type); + Py_RETURN_NONE; + """)]) + d = {} + module.cell_type(d) + def f(o): + def g(): + return o + return g + + cell_type = type(f(0).__closure__[0]) + assert d["cell"] is cell_type diff --git a/pypy/module/cpyext/test/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_classobject.py @@ -0,0 +1,56 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase + + +class AppTestInstanceMethod(AppTestCpythonExtensionBase): + def test_instancemethod(self): + module = self.import_extension('foo', [ + ("instancemethod", "METH_O", + """ + return PyInstanceMethod_New(args); + """)]) + + def testfunction(self): + """some doc""" + return self + + class InstanceMethod: + id = module.instancemethod(id) + testmethod = module.instancemethod(testfunction) + + inst = InstanceMethod() + assert id(inst) == inst.id() + assert inst.testmethod() is inst + assert InstanceMethod.testmethod(inst) is inst + assert InstanceMethod.__dict__['testmethod'](inst) is inst + assert inst.testmethod.__doc__ == testfunction.__doc__ + assert InstanceMethod.testmethod.__doc__ == testfunction.__doc__ + + InstanceMethod.testmethod.attribute = "test" + assert testfunction.attribute == "test" + raises(AttributeError, setattr, inst.testmethod, "attribute", "test") + + def test_instancemethod_cpyext_attributes(self): + module = self.import_extension('foo', [ + ("instancemethod_get_doc", "METH_O", + """ + PyObject* instancemethod = PyInstanceMethod_New(args); + return PyObject_GetAttrString(instancemethod, "__doc__"); + """), + ("instancemethod_get_name", "METH_O", + """ + PyObject* instancemethod = PyInstanceMethod_New(args); + return PyObject_GetAttrString(instancemethod, "__name__"); + """), + ("instancemethod_get_module", "METH_O", + """ + PyObject* instancemethod = PyInstanceMethod_New(args); + return PyObject_GetAttrString(instancemethod, "__module__"); + """) + ]) + + def testfunction(self): + """some doc""" + return self + assert(module.instancemethod_get_doc(testfunction) == testfunction.__doc__) + assert(module.instancemethod_get_module(testfunction) == testfunction.__module__) + assert(module.instancemethod_get_name(testfunction) == testfunction.__name__) \ No newline at end of file diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_codecs.py @@ -0,0 +1,15 @@ +# encoding: iso-8859-15 +from pypy.module.cpyext.test.test_api import BaseApiTest +from rpython.rtyper.lltypesystem import rffi +from pypy.module.cpyext.codecs import ( + PyCodec_IncrementalEncoder, PyCodec_IncrementalDecoder) + +class TestCodecs(BaseApiTest): + def test_incremental(self, space): + utf8 = rffi.str2charp('utf-8') + w_encoder = PyCodec_IncrementalEncoder(space, utf8, None) + w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) + w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) + w_decoded = space.call_method(w_decoder, 'decode', w_encoded) + assert space.utf8_w(w_decoded) == u'späm'.encode("utf-8") + rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_complexobject.py b/pypy/module/cpyext/test/test_complexobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_complexobject.py @@ -0,0 +1,64 @@ +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.complexobject import ( + PyComplex_FromDoubles, PyComplex_RealAsDouble, PyComplex_ImagAsDouble) + +class TestComplexObject(BaseApiTest): + def test_complexobject(self, space): + w_value = PyComplex_FromDoubles(space, 1.2, 3.4) + assert space.unwrap(w_value) == 1.2+3.4j + assert PyComplex_RealAsDouble(space, w_value) == 1.2 + assert PyComplex_ImagAsDouble(space, w_value) == 3.4 + + assert PyComplex_RealAsDouble(space, space.wrap(42)) == 42 + assert PyComplex_RealAsDouble(space, space.wrap(1.5)) == 1.5 + assert PyComplex_ImagAsDouble(space, space.wrap(1.5)) == 0.0 + + # cpython accepts anything for PyComplex_ImagAsDouble + assert PyComplex_ImagAsDouble(space, space.w_None) == 0.0 + with raises_w(space, TypeError): + PyComplex_RealAsDouble(space, space.w_None) + +class AppTestCComplex(AppTestCpythonExtensionBase): + def test_AsCComplex(self): + module = self.import_extension('foo', [ + ("as_tuple", "METH_O", + """ + Py_complex c = PyComplex_AsCComplex(args); + if (PyErr_Occurred()) return NULL; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.as_tuple(12-34j) == (12, -34) + assert module.as_tuple(-3.14) == (-3.14, 0.0) + raises(TypeError, module.as_tuple, "12") + + def test_FromCComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + return PyComplex_FromCComplex(c); + """)]) + assert module.test() == 1.2 + 3.4j + + def test_PyComplex_to_WComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_NOARGS", + """ + Py_complex c = {1.2, 3.4}; + PyObject *obj = PyObject_Malloc(sizeof(PyComplexObject)); + obj = PyObject_Init(obj, &PyComplex_Type); + assert(obj != NULL); + ((PyComplexObject *)obj)->cval = c; + return obj; + """)]) + assert module.test() == 1.2 + 3.4j + + def test_WComplex_to_PyComplex(self): + module = self.import_extension('foo', [ + ("test", "METH_O", + """ + Py_complex c = ((PyComplexObject *)args)->cval; + return Py_BuildValue("dd", c.real, c.imag); + """)]) + assert module.test(1.2 + 3.4j) == (1.2, 3.4) diff --git a/pypy/module/cpyext/test/test_cparser.py b/pypy/module/cpyext/test/test_cparser.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_cparser.py @@ -0,0 +1,260 @@ +from rpython.flowspace.model import const +from rpython.flowspace.objspace import build_flow +from rpython.translator.simplify import simplify_graph +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.cparser import parse_source, CTypeSpace + +def test_configure(): + decl = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + TestFloatObject = cts.definitions['TestFloatObject'] + assert isinstance(TestFloatObject, lltype.Struct) + assert TestFloatObject.c_ob_refcnt == rffi.SSIZE_T + assert TestFloatObject.c_ob_pypy_link == rffi.SSIZE_T + assert TestFloatObject.c_ob_fval == rffi.DOUBLE + +def test_simple(): + decl = "typedef ssize_t Py_ssize_t;" + cts = parse_source(decl) + assert cts.definitions == {'Py_ssize_t': rffi.SSIZE_T} + +def test_macro(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } PyFloatObject; + """ + cts = parse_source(decl) + assert 'PyFloatObject' in cts.definitions + assert 'PyObject_HEAD' in cts.macros + +def test_include(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts1 = parse_source(cdef1) + Type = cts1.definitions['Type'] + assert isinstance(Type, lltype.Struct) + cts2 = parse_source(cdef2, includes=[cts1]) + assert 'Type' not in cts2.definitions + Object = cts2.definitions['Object'] + assert Object.c_type.TO is Type + +def test_multiple_sources(): + cdef1 = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + char *name; + } Type; + """ + cdef2 = """ + typedef struct { + PyObject_HEAD + Py_ssize_t ob_foo; + Type *type; + } Object; + """ + cts = CTypeSpace() + cts.parse_source(cdef1) + Type = cts.definitions['Type'] + assert isinstance(Type, lltype.Struct) + assert 'Object' not in cts.definitions + cts.parse_source(cdef2) + Object = cts.definitions['Object'] + assert Object.c_type.TO is Type + +def test_incomplete(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + """ + cts = parse_source(cdef) + Object = cts.gettype('Object') + assert isinstance(Object, lltype.Struct) + +def test_recursive(): + cdef = """ + typedef ssize_t Py_ssize_t; + + typedef struct { + Py_ssize_t ob_refcnt; + Py_ssize_t ob_pypy_link; + struct _typeobject *ob_type; + } Object; + + typedef struct { + void *buf; + Object *obj; + } Buffer; + + typedef struct _typeobject { + Object *obj; + } Type; + """ + cts = parse_source(cdef) + Object = cts.definitions['Object'] + assert isinstance(Object, lltype.Struct) + hash(Object) + +def test_nested_struct(): + cdef = """ + typedef struct { + int x; + } foo; + typedef struct { + foo y; + } bar; + """ + cts = parse_source(cdef) + bar = cts.gettype('bar') + assert isinstance(bar, lltype.Struct) + hash(bar) # bar is hashable + +def test_const(): + cdef = """ + typedef struct { + const char * const foo; + } bar; + """ + cts = parse_source(cdef) + assert cts.definitions['bar'].c_foo == rffi.CONST_CCHARP != rffi.CCHARP + +def test_gettype(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + """ + cts = parse_source(decl) + assert cts.gettype('Py_ssize_t') == rffi.SSIZE_T + assert cts.gettype('TestFloatObject *').TO.c_ob_refcnt == rffi.SSIZE_T + assert cts.cast('Py_ssize_t', 42) == rffi.cast(rffi.SSIZE_T, 42) + +def test_parse_funcdecl(): + decl = """ + typedef ssize_t Py_ssize_t; + + #define PyObject_HEAD \ + Py_ssize_t ob_refcnt; \ + Py_ssize_t ob_pypy_link; \ + + typedef struct { + PyObject_HEAD + double ob_fval; + } TestFloatObject; + + typedef TestFloatObject* (*func_t)(int, int); + """ + cts = parse_source(decl) + func_decl = cts.parse_func("func_t * some_func(TestFloatObject*)") + assert func_decl.name == 'some_func' + assert func_decl.get_llresult(cts) == cts.gettype('func_t*') + assert func_decl.get_llargs(cts) == [cts.gettype('TestFloatObject *')] + +def test_write_func(): + from ..api import ApiFunction + from rpython.translator.c.database import LowLevelDatabase + db = LowLevelDatabase() + cdef = """ + typedef ssize_t Py_ssize_t; + """ + cts = parse_source(cdef) + cdecl = "Py_ssize_t * some_func(Py_ssize_t*)" + decl = cts.parse_func(cdecl) + api_function = ApiFunction( + decl.get_llargs(cts), decl.get_llresult(cts), lambda space, x: None, + cdecl=decl) + assert (api_function.get_api_decl('some_func', db) == + "PyAPI_FUNC(Py_ssize_t *) some_func(Py_ssize_t * arg0);") + + +def test_wchar_t(): + cdef = """ + typedef struct { wchar_t* x; } test; + """ + cts = parse_source(cdef, headers=['stddef.h']) + obj = lltype.malloc(cts.gettype('test'), flavor='raw') + obj.c_x = cts.cast('wchar_t*', 0) + obj.c_x = lltype.nullptr(rffi.CWCHARP.TO) + lltype.free(obj, flavor='raw') + + +def test_translate_cast(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.cast('Py_ssize_t*', 0) + graph = build_flow(f) + simplify_graph(graph) + assert len(graph.startblock.operations) == 1 + op = graph.startblock.operations[0] + assert op.args[0] == const(rffi.cast) + assert op.args[1].value is cts.gettype('Py_ssize_t*') + +def test_translate_gettype(): + cdef = "typedef ssize_t Py_ssize_t;" + cts = parse_source(cdef) + + def f(): + return cts.gettype('Py_ssize_t*') + graph = build_flow(f) + simplify_graph(graph) + # Check that the result is constant-folded + assert graph.startblock.operations == [] + [link] = graph.startblock.exits + assert link.target is graph.returnblock + assert link.args[0] == const(rffi.CArrayPtr(rffi.SSIZE_T)) diff --git a/pypy/module/cpyext/test/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_datetime.py @@ -0,0 +1,354 @@ +import pytest + +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.test.test_api import BaseApiTest +from pypy.module.cpyext.cdatetime import * +from pypy.module.cpyext.cdatetime import ( + _PyDateTime_Import, _PyDateTime_FromDateAndTime, _PyDate_FromDate, + _PyTime_FromTime, _PyDelta_FromDelta) +import datetime + +class TestDatetime(BaseApiTest): + def test_date(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDate_FromDate(space, 2010, 06, 03, date_api.c_DateType) + assert space.unwrap(space.str(w_date)) == '2010-06-03' + + assert PyDate_Check(space, w_date) + assert PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + + def test_time(self, space): + date_api = _PyDateTime_Import(space) + w_time = _PyTime_FromTime( + space, 23, 15, 40, 123456, space.w_None, date_api.c_TimeType) + assert space.unwrap(space.str(w_time)) == '23:15:40.123456' + + assert PyTime_Check(space, w_time) + assert PyTime_CheckExact(space, w_time) + + assert PyDateTime_TIME_GET_HOUR(space, w_time) == 23 + assert PyDateTime_TIME_GET_MINUTE(space, w_time) == 15 + assert PyDateTime_TIME_GET_SECOND(space, w_time) == 40 + assert PyDateTime_TIME_GET_MICROSECOND(space, w_time) == 123456 + + def test_datetime(self, space): + date_api = _PyDateTime_Import(space) + w_date = _PyDateTime_FromDateAndTime( + space, 2010, 06, 03, 23, 15, 40, 123456, space.w_None, + date_api.c_DateTimeType) + assert space.unwrap(space.str(w_date)) == '2010-06-03 23:15:40.123456' + + assert PyDateTime_Check(space, w_date) + assert PyDateTime_CheckExact(space, w_date) + assert PyDate_Check(space, w_date) + assert not PyDate_CheckExact(space, w_date) + + assert PyDateTime_GET_YEAR(space, w_date) == 2010 + assert PyDateTime_GET_MONTH(space, w_date) == 6 + assert PyDateTime_GET_DAY(space, w_date) == 3 + assert PyDateTime_DATE_GET_HOUR(space, w_date) == 23 + assert PyDateTime_DATE_GET_MINUTE(space, w_date) == 15 + assert PyDateTime_DATE_GET_SECOND(space, w_date) == 40 + assert PyDateTime_DATE_GET_MICROSECOND(space, w_date) == 123456 + + def test_delta(self, space): + date_api = _PyDateTime_Import(space) + w_delta = space.appexec( + [space.wrap(3), space.wrap(15)], """(days, seconds): + from datetime import timedelta + return timedelta(days, seconds) + """) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + w_delta = _PyDelta_FromDelta(space, 10, 20, 30, True, date_api.c_DeltaType) + assert PyDelta_Check(space, w_delta) + assert PyDelta_CheckExact(space, w_delta) + + assert PyDateTime_DELTA_GET_DAYS(space, w_delta) == 10 + assert PyDateTime_DELTA_GET_SECONDS(space, w_delta) == 20 + assert PyDateTime_DELTA_GET_MICROSECONDS(space, w_delta) == 30 + + def test_fromtimestamp(self, space): + w_args = space.wrap((0,)) + w_date = PyDate_FromTimestamp(space, w_args) + date = datetime.date.fromtimestamp(0) + assert space.unwrap(space.str(w_date)) == str(date) + + w_args = space.wrap((0,)) + w_date = PyDateTime_FromTimestamp(space, w_args) + date = datetime.datetime.fromtimestamp(0) + assert space.unwrap(space.str(w_date)) == str(date) + + @pytest.mark.parametrize('name', ['Time', 'DateTime', 'Date', 'Delta']) + def test_basicsize(self, space, name): + datetime = _PyDateTime_Import(space) + py_size = getattr(datetime, "c_%sType" % name).c_tp_basicsize + c_size = rffi.sizeof(cts.gettype("PyDateTime_%s" % name)) + assert py_size == c_size + + +class AppTestDatetime(AppTestCpythonExtensionBase): + def test_CAPI(self): + module = self.import_extension('foo', [ + ("get_types", "METH_NOARGS", + """ + PyDateTime_IMPORT; + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_RuntimeError, "No PyDateTimeAPI"); + return NULL; + } + return PyTuple_Pack(5, + PyDateTimeAPI->DateType, + PyDateTimeAPI->DateTimeType, + PyDateTimeAPI->TimeType, + PyDateTimeAPI->DeltaType, + PyDateTimeAPI->TZInfoType); + """), + ("clear_types", "METH_NOARGS", + """ + Py_DECREF(PyDateTimeAPI->DateType); + Py_DECREF(PyDateTimeAPI->DateTimeType); + Py_DECREF(PyDateTimeAPI->TimeType); + Py_DECREF(PyDateTimeAPI->DeltaType); + Py_DECREF(PyDateTimeAPI->TZInfoType); + Py_RETURN_NONE; + """ + ) + ], prologue='#include "datetime.h"\n') + import datetime + assert module.get_types() == (datetime.date, + datetime.datetime, + datetime.time, + datetime.timedelta, + datetime.tzinfo) + module.clear_types() + + def test_constructors(self): + module = self.import_extension('foo', [ + ("new_date", "METH_NOARGS", + """ PyDateTime_IMPORT; + return PyDateTimeAPI->Date_FromDate( + 2000, 6, 6, PyDateTimeAPI->DateType); + """), + ("new_time", "METH_NOARGS", + """ PyDateTime_IMPORT; + return PyDateTimeAPI->Time_FromTime( + 6, 6, 6, 6, Py_None, PyDateTimeAPI->TimeType); + """), + ("new_datetime", "METH_NOARGS", + """ PyDateTime_IMPORT; + return PyDateTimeAPI->DateTime_FromDateAndTime( + 2000, 6, 6, 6, 6, 6, 6, Py_None, + PyDateTimeAPI->DateTimeType); + """), + ], prologue='#include "datetime.h"\n') + import datetime + assert module.new_date() == datetime.date(2000, 6, 6) + assert module.new_time() == datetime.time(6, 6, 6, 6) + assert module.new_datetime() == datetime.datetime( + 2000, 6, 6, 6, 6, 6, 6) + + def test_macros(self): + module = self.import_extension('foo', [ + ("test_date_macros", "METH_NOARGS", + """ + PyObject* obj; + PyDateTime_Date* d; + PyDateTime_IMPORT; + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_RuntimeError, "No PyDateTimeAPI"); + return NULL; + } + obj = PyDate_FromDate(2000, 6, 6); + d = (PyDateTime_Date*)obj; + + PyDateTime_GET_YEAR(obj); + PyDateTime_GET_YEAR(d); + + PyDateTime_GET_MONTH(obj); + PyDateTime_GET_MONTH(d); + + PyDateTime_GET_DAY(obj); + PyDateTime_GET_DAY(d); + + return obj; + """), + ("test_datetime_macros", "METH_NOARGS", + """ + PyObject* obj; + PyDateTime_DateTime *dt; + PyDateTime_IMPORT; + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_RuntimeError, "No PyDateTimeAPI"); + return NULL; + } + obj = PyDateTime_FromDateAndTime(2000, 6, 6, 6, 6, 6, 6); + dt = (PyDateTime_DateTime*)obj; + + PyDateTime_GET_YEAR(obj); + PyDateTime_GET_YEAR(dt); + + PyDateTime_GET_MONTH(obj); + PyDateTime_GET_MONTH(dt); + + PyDateTime_GET_DAY(obj); + PyDateTime_GET_DAY(dt); + + PyDateTime_DATE_GET_HOUR(obj); + PyDateTime_DATE_GET_HOUR(dt); + + PyDateTime_DATE_GET_MINUTE(obj); + PyDateTime_DATE_GET_MINUTE(dt); + + PyDateTime_DATE_GET_SECOND(obj); + PyDateTime_DATE_GET_SECOND(dt); + + PyDateTime_DATE_GET_MICROSECOND(obj); + PyDateTime_DATE_GET_MICROSECOND(dt); + + return obj; + """), + ("test_time_macros", "METH_NOARGS", + """ + PyObject* obj; + PyDateTime_Time* t; + PyDateTime_IMPORT; + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_RuntimeError, "No PyDateTimeAPI"); + return NULL; + } + obj = PyTime_FromTime(6, 6, 6, 6); + t = (PyDateTime_Time*)obj; + + PyDateTime_TIME_GET_HOUR(obj); + PyDateTime_TIME_GET_HOUR(t); + + PyDateTime_TIME_GET_MINUTE(obj); + PyDateTime_TIME_GET_MINUTE(t); + + PyDateTime_TIME_GET_SECOND(obj); + PyDateTime_TIME_GET_SECOND(t); + + PyDateTime_TIME_GET_MICROSECOND(obj); + PyDateTime_TIME_GET_MICROSECOND(t); + + return obj; + """), + ("test_delta_macros", "METH_NOARGS", + """ + PyObject* obj; + PyDateTime_Delta* delta; + PyDateTime_IMPORT; + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_RuntimeError, "No PyDateTimeAPI"); + return NULL; + } + obj = PyDelta_FromDSU(6, 6, 6); + delta = (PyDateTime_Delta*)obj; + +#if defined(PYPY_VERSION) || PY_VERSION_HEX >= 0x03030000 + // These macros are only defined in CPython 3.x and PyPy. + // See: http://bugs.python.org/issue13727 + PyDateTime_DELTA_GET_DAYS(obj); + PyDateTime_DELTA_GET_DAYS(delta); + + PyDateTime_DELTA_GET_SECONDS(obj); + PyDateTime_DELTA_GET_SECONDS(delta); + + PyDateTime_DELTA_GET_MICROSECONDS(obj); + PyDateTime_DELTA_GET_MICROSECONDS(delta); +#endif + return obj; + """), + ], prologue='#include "datetime.h"\n') + import datetime + assert module.test_date_macros() == datetime.date(2000, 6, 6) + assert module.test_datetime_macros() == datetime.datetime( + 2000, 6, 6, 6, 6, 6, 6) + assert module.test_time_macros() == datetime.time(6, 6, 6, 6) + assert module.test_delta_macros() == datetime.timedelta(6, 6, 6) + + def test_tzinfo(self): + module = self.import_extension('foo', [ + ("time_with_tzinfo", "METH_O", + """ PyDateTime_IMPORT; + return PyDateTimeAPI->Time_FromTime( + 6, 6, 6, 6, args, PyDateTimeAPI->TimeType); + """), + ("datetime_with_tzinfo", "METH_O", + """ + PyObject * obj; + int tzrefcnt = args->ob_refcnt; + PyDateTime_IMPORT; + obj = PyDateTimeAPI->DateTime_FromDateAndTime( + 2000, 6, 6, 6, 6, 6, 6, args, + PyDateTimeAPI->DateTimeType); + if (!((PyDateTime_DateTime*)obj)->hastzinfo) + { + Py_DECREF(obj); + PyErr_SetString(PyExc_ValueError, "missing tzinfo"); + return NULL; + } + if (((PyDateTime_DateTime*)obj)->tzinfo->ob_refcnt <= tzrefcnt) + { + Py_DECREF(obj); + PyErr_SetString(PyExc_ValueError, "tzinfo refcnt not incremented"); + return NULL; + } + return obj; + + """), + ], prologue='#include "datetime.h"\n') + from datetime import tzinfo, datetime, timedelta, time + # copied from datetime documentation + class GMT1(tzinfo): + def __del__(self): + print('deleting GMT1') + def utcoffset(self, dt): + return timedelta(hours=1) + self.dst(dt) + def dst(self, dt): + return timedelta(0) + def tzname(self,dt): + return "GMT +1" + gmt1 = GMT1() + dt1 = module.time_with_tzinfo(gmt1) + assert dt1 == time(6, 6, 6, 6, gmt1) + assert '+01' in str(dt1) + dt_tz = module.datetime_with_tzinfo(gmt1) + assert dt_tz == datetime(2000, 6, 6, 6, 6, 6, 6, gmt1) + + def test_checks(self): + module = self.import_extension('foo', [ + ("checks", "METH_O", + """ PyDateTime_IMPORT; + return PyTuple_Pack(10, + PyBool_FromLong(PyDateTime_Check(args)), + PyBool_FromLong(PyDateTime_CheckExact(args)), + PyBool_FromLong(PyDate_Check(args)), + PyBool_FromLong(PyDate_CheckExact(args)), + PyBool_FromLong(PyTime_Check(args)), + PyBool_FromLong(PyTime_CheckExact(args)), + PyBool_FromLong(PyDelta_Check(args)), + PyBool_FromLong(PyDelta_CheckExact(args)), + PyBool_FromLong(PyTZInfo_Check(args)), + PyBool_FromLong(PyTZInfo_CheckExact(args)) + ); + """), + ], prologue='#include "datetime.h"\n') + from datetime import tzinfo, datetime, timedelta, time, date + o = date(1, 1, 1) + assert module.checks(o) == (False,) * 2 + (True,) * 2 + (False,) * 6 + o = time(1, 1, 1) + assert module.checks(o) == (False,) * 4 + (True,) * 2 + (False,) * 4 + o = timedelta(1, 1, 1) + assert module.checks(o) == (False,) * 6 + (True,) * 2 + (False,) * 2 + o = datetime(1, 1, 1) + assert module.checks(o) == (True,) * 3 + (False,) * 7 # isinstance(datetime, date) + o = tzinfo() + assert module.checks(o) == (False,) * 8 + (True,) * 2 + diff --git a/pypy/module/cpyext/test/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/test_dictobject.py @@ -0,0 +1,334 @@ +import py +from pytest import raises +from rpython.rtyper.lltypesystem import rffi, lltype +from pypy.module.cpyext.test.test_api import BaseApiTest, raises_w +from pypy.module.cpyext.api import Py_ssize_tP, PyObjectP, PyTypeObjectPtr +from pypy.module.cpyext.pyobject import make_ref, from_ref +from pypy.interpreter.error import OperationError +from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +from pypy.module.cpyext.dictproxyobject import * +from pypy.module.cpyext.dictobject import * +from pypy.module.cpyext.pyobject import decref + +class TestDictObject(BaseApiTest): + def test_dict(self, space): + d = PyDict_New(space) + assert space.eq_w(d, space.newdict()) + + assert space.eq_w(PyDict_GetItem(space, space.wrap({"a": 72}), + space.wrap("a")), + space.wrap(72)) + + PyDict_SetItem(space, d, space.wrap("c"), space.wrap(42)) + assert space.eq_w(space.getitem(d, space.wrap("c")), + space.wrap(42)) + + space.setitem(d, space.wrap("name"), space.wrap(3)) + assert space.eq_w(PyDict_GetItem(space, d, space.wrap("name")), + space.wrap(3)) + + space.delitem(d, space.wrap("name")) From pypy.commits at gmail.com Wed Sep 12 08:38:54 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:38:54 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: fix indent Message-ID: <5b9908de.1c69fb81.75ea5.77c6@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95111:19e3f6195d1c Date: 2018-09-12 15:35 +0300 http://bitbucket.org/pypy/pypy/changeset/19e3f6195d1c/ Log: fix indent diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -399,7 +399,7 @@ assert space.eq_w(encoded, encoded_obj) with raises_w(space, TypeError): PyUnicode_AsEncodedString( - space, space.newtuple([1, 2, 3]), None, None) + space, space.newtuple([1, 2, 3]), None, None) with raises_w(space, TypeError): PyUnicode_AsEncodedString(space, space.newbytes(''), None, None) ascii = rffi.str2charp('ascii') From pypy.commits at gmail.com Wed Sep 12 08:38:56 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 05:38:56 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: remove cruft from reverting 943b0266d564 Message-ID: <5b9908e0.1c69fb81.a0b22.850a@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r95112:3cb557b6599e Date: 2018-09-12 15:37 +0300 http://bitbucket.org/pypy/pypy/changeset/3cb557b6599e/ Log: remove cruft from reverting 943b0266d564 diff --git a/pypy/module/cpyext/test0/__init__.py b/pypy/module/cpyext/test0/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test0/conftest.py b/pypy/module/cpyext/test0/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) diff --git a/pypy/module/cpyext/test1/__init__.py b/pypy/module/cpyext/test1/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test1/conftest.py b/pypy/module/cpyext/test1/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test1/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) From pypy.commits at gmail.com Wed Sep 12 10:17:17 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 07:17:17 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5b991fed.1c69fb81.53720.ff8c@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95113:53b278863286 Date: 2018-09-12 17:16 +0300 http://bitbucket.org/pypy/pypy/changeset/53b278863286/ Log: merge py3.5 into branch diff --git a/pypy/module/cpyext/test0/__init__.py b/pypy/module/cpyext/test0/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test0/conftest.py b/pypy/module/cpyext/test0/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) diff --git a/pypy/module/cpyext/test1/__init__.py b/pypy/module/cpyext/test1/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test1/conftest.py b/pypy/module/cpyext/test1/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test1/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) From pypy.commits at gmail.com Wed Sep 12 14:55:35 2018 From: pypy.commits at gmail.com (mattip) Date: Wed, 12 Sep 2018 11:55:35 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fix translation Message-ID: <5b996127.1c69fb81.91d67.07de@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95114:9d1232e7d075 Date: 2018-09-12 21:54 +0300 http://bitbucket.org/pypy/pypy/changeset/9d1232e7d075/ Log: fix translation diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -138,6 +138,8 @@ @staticmethod def convert_arg_to_w_unicode(space, w_other, strict=None): if space.is_w(space.type(w_other), space.w_unicode): + # XXX why do we need this for translation??? + assert isinstance(w_other, W_UnicodeObject) return w_other if space.isinstance_w(w_other, space.w_bytes): return unicode_from_string(space, w_other) From pypy.commits at gmail.com Thu Sep 13 07:51:33 2018 From: pypy.commits at gmail.com (arigo) Date: Thu, 13 Sep 2018 04:51:33 -0700 (PDT) Subject: [pypy-commit] cffi default: Issue 378 Message-ID: <5b9a4f45.1c69fb81.34f03.dcd2@mx.google.com> Author: Armin Rigo Branch: Changeset: r3160:3184b0a675fc Date: 2018-09-13 13:51 +0200 http://bitbucket.org/cffi/cffi/changeset/3184b0a675fc/ Log: Issue 378 Workaround for a GCC bug diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c --- a/c/_cffi_backend.c +++ b/c/_cffi_backend.c @@ -892,11 +892,21 @@ return 0; } +#ifdef __GNUC__ +/* This is a workaround for what I think is a GCC bug on several + platforms. See issue #378. */ +__attribute__((noinline)) +#endif +void _cffi_memcpy(char *target, const void *src, size_t size) +{ + memcpy(target, src, size); +} + #define _write_raw_data(type) \ do { \ if (size == sizeof(type)) { \ type r = (type)source; \ - memcpy(target, &r, sizeof(type)); \ + _cffi_memcpy(target, &r, sizeof(type)); \ return; \ } \ } while(0) @@ -970,8 +980,8 @@ if (size == 2*sizeof(type)) { \ type r = (type)source.real; \ type i = (type)source.imag; \ - memcpy(target, &r, sizeof(type)); \ - memcpy(target+sizeof(type), &i, sizeof(type)); \ + _cffi_memcpy(target, &r, sizeof(type)); \ + _cffi_memcpy(target+sizeof(type), &i, sizeof(type)); \ return; \ } \ } while(0) From pypy.commits at gmail.com Sat Sep 15 07:38:20 2018 From: pypy.commits at gmail.com (cfbolz) Date: Sat, 15 Sep 2018 04:38:20 -0700 (PDT) Subject: [pypy-commit] pypy default: Merged in carlbordum/pypy/fix-readme-typo (pull request #623) Message-ID: <5b9cef2c.1c69fb81.d7d60.6c93@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r95117:2dbe9a39e6c6 Date: 2018-09-15 11:37 +0000 http://bitbucket.org/pypy/pypy/changeset/2dbe9a39e6c6/ Log: Merged in carlbordum/pypy/fix-readme-typo (pull request #623) Fix typo in README.rst and make it slightly more grammarly consistent diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - From pypy.commits at gmail.com Sat Sep 15 07:38:24 2018 From: pypy.commits at gmail.com (carlbordum) Date: Sat, 15 Sep 2018 04:38:24 -0700 (PDT) Subject: [pypy-commit] pypy fix-readme-typo: Fix typo in README.rst: interperter => interpreter. Message-ID: <5b9cef30.1c69fb81.c8c5.3e73@mx.google.com> Author: Carl Bordum Hansen Branch: fix-readme-typo Changeset: r95115:2538f91b44b3 Date: 2018-09-15 12:35 +0200 http://bitbucket.org/pypy/pypy/changeset/2538f91b44b3/ Log: Fix typo in README.rst: interperter => interpreter. diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: From pypy.commits at gmail.com Sat Sep 15 07:38:26 2018 From: pypy.commits at gmail.com (carlbordum) Date: Sat, 15 Sep 2018 04:38:26 -0700 (PDT) Subject: [pypy-commit] pypy fix-readme-typo: Add punctuation and link consistency to README.rst Message-ID: <5b9cef32.1c69fb81.fd5f8.b191@mx.google.com> Author: Carl Bordum Hansen Branch: fix-readme-typo Changeset: r95116:f4645467b64b Date: 2018-09-15 12:36 +0200 http://bitbucket.org/pypy/pypy/changeset/f4645467b64b/ Log: Add punctuation and link consistency to README.rst diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - From pypy.commits at gmail.com Sat Sep 15 16:34:41 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 13:34:41 -0700 (PDT) Subject: [pypy-commit] pypy default: Issue #2887 Message-ID: <5b9d6ce1.1c69fb81.b4590.2edd@mx.google.com> Author: Armin Rigo Branch: Changeset: r95118:2136f08b127c Date: 2018-09-13 13:18 +0200 http://bitbucket.org/pypy/pypy/changeset/2136f08b127c/ Log: Issue #2887 Performance improvement for converting Python bools to C int/bool. diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -128,7 +128,7 @@ # (possibly) convert and cast a Python object to a long long. # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) try: bigint = space.bigint_w(w_ob, allow_conversion=False) @@ -145,7 +145,7 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) try: bigint = space.bigint_w(w_ob, allow_conversion=False) @@ -165,7 +165,7 @@ # This accepts a Python int too, and does convertions from other types of # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut value = space.int_w(w_ob) if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) @@ -190,8 +190,10 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut value = space.int_w(w_ob) + if not we_are_translated(): + value = getattr(value, 'constant', value) # for NonConstant if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_uint(value) From pypy.commits at gmail.com Sat Sep 15 16:34:43 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 13:34:43 -0700 (PDT) Subject: [pypy-commit] pypy default: Issue #2889 Message-ID: <5b9d6ce3.1c69fb81.cd607.6a08@mx.google.com> Author: Armin Rigo Branch: Changeset: r95119:83dd3becaa5f Date: 2018-09-15 22:34 +0200 http://bitbucket.org/pypy/pypy/changeset/83dd3becaa5f/ Log: Issue #2889 Move jit.dont_look_inside around a few more instructions. In this way, the JIT residual call is not going to invoke a function that returns a tuple. Instead, the tuple-returning calls are all done as C code, inlined into each other. diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -315,9 +315,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( From pypy.commits at gmail.com Sat Sep 15 16:34:45 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 13:34:45 -0700 (PDT) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5b9d6ce5.1c69fb81.1024b.8ca3@mx.google.com> Author: Armin Rigo Branch: Changeset: r95120:476eea9a87f7 Date: 2018-09-15 22:34 +0200 http://bitbucket.org/pypy/pypy/changeset/476eea9a87f7/ Log: merge heads diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - From pypy.commits at gmail.com Sat Sep 15 17:38:41 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:38:41 -0700 (PDT) Subject: [pypy-commit] cffi default: Extra tests Message-ID: <5b9d7be1.1c69fb81.dc2e8.af33@mx.google.com> Author: Armin Rigo Branch: Changeset: r3161:5fc6cdbc1cf6 Date: 2018-09-15 23:38 +0200 http://bitbucket.org/cffi/cffi/changeset/5fc6cdbc1cf6/ Log: Extra tests diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -347,6 +347,16 @@ assert newp(pp, max)[0] == max py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) From pypy.commits at gmail.com Sat Sep 15 17:41:39 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:41:39 -0700 (PDT) Subject: [pypy-commit] pypy default: Simplify misc.as_long() Message-ID: <5b9d7c93.1c69fb81.a769.92eb@mx.google.com> Author: Armin Rigo Branch: Changeset: r95121:048ec4801682 Date: 2018-09-15 23:02 +0200 http://bitbucket.org/pypy/pypy/changeset/048ec4801682/ Log: Simplify misc.as_long() diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -153,18 +153,9 @@ # Same as as_long_long(), but returning an int instead. if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + if _is_a_float(space, w_ob): + space.bigint_w(w_ob, allow_conversion=False) # raise the right error + return space.int_w(space.int(w_ob)) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. From pypy.commits at gmail.com Sat Sep 15 17:41:41 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:41:41 -0700 (PDT) Subject: [pypy-commit] pypy default: Extra tests from cffi Message-ID: <5b9d7c95.1c69fb81.9e2ce.b892@mx.google.com> Author: Armin Rigo Branch: Changeset: r95122:158efead80b8 Date: 2018-09-15 23:40 +0200 http://bitbucket.org/pypy/pypy/changeset/158efead80b8/ Log: Extra tests from cffi diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -336,6 +336,16 @@ assert newp(pp, max)[0] == max py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) From pypy.commits at gmail.com Sat Sep 15 17:47:34 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:47:34 -0700 (PDT) Subject: [pypy-commit] cffi default: More tests Message-ID: <5b9d7df6.1c69fb81.11250.e64c@mx.google.com> Author: Armin Rigo Branch: Changeset: r3162:4422c1cad114 Date: 2018-09-15 23:47 +0200 http://bitbucket.org/cffi/cffi/changeset/4422c1cad114/ Log: More tests diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -345,6 +345,10 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) From pypy.commits at gmail.com Sat Sep 15 17:53:51 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:53:51 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b9d7f6f.1c69fb81.dde2a.7dd2@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95123:84f081c9bba4 Date: 2018-09-15 22:53 +0200 http://bitbucket.org/pypy/pypy/changeset/84f081c9bba4/ Log: hg merge default diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -317,9 +317,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( @@ -151,7 +157,7 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) try: bigint = space.bigint_w(w_ob, allow_conversion=False) From pypy.commits at gmail.com Sat Sep 15 17:53:53 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:53:53 -0700 (PDT) Subject: [pypy-commit] pypy default: More tests from cffi Message-ID: <5b9d7f71.1c69fb81.34f03.425a@mx.google.com> Author: Armin Rigo Branch: Changeset: r95124:20239d9e5ce9 Date: 2018-09-15 23:49 +0200 http://bitbucket.org/pypy/pypy/changeset/20239d9e5ce9/ Log: More tests from cffi diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,6 +334,10 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) From pypy.commits at gmail.com Sat Sep 15 17:53:55 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 14:53:55 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b9d7f73.1c69fb81.ef55b.cfe6@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95125:3b0f38d2f3e8 Date: 2018-09-15 23:53 +0200 http://bitbucket.org/pypy/pypy/changeset/3b0f38d2f3e8/ Log: hg merge default diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -135,21 +135,14 @@ # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. try: - value = space.int_w(w_ob) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise - else: - return value - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + bigint = space.bigint_w(w_ob, allow_conversion=True) try: return bigint.tolonglong() except OverflowError: @@ -157,20 +150,15 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.isinstance_w(w_ob, space.w_int): # shortcut - return space.int_w(w_ob) try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + return space.int_w(w_ob, allow_conversion=True) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. @@ -178,23 +166,19 @@ # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. try: - value = space.int_w(w_ob) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise + if strict and _is_a_float(space, w_ob): + raise else: if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_ulonglong(value) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if strict and _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.toulonglong() @@ -208,13 +192,19 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if strict and _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + else: + if strict and value < 0: + raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) + return r_uint(value) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.touint() @@ -247,7 +237,12 @@ def _standard_object_as_bool(space, w_ob): if space.isinstance_w(w_ob, space.w_int): - return space.bigint_w(w_ob).tobool() + try: + return space.int_w(w_ob) != 0 + except OperationError as e: + if not e.match(space, space.w_OverflowError): + raise + return space.bigint_w(w_ob).tobool() if space.isinstance_w(w_ob, space.w_float): return space.float_w(w_ob) != 0.0 raise _NotStandardObject diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,8 +334,22 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) From pypy.commits at gmail.com Sat Sep 15 18:00:02 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 15:00:02 -0700 (PDT) Subject: [pypy-commit] cffi default: Argh, bogus tests Message-ID: <5b9d80e2.1c69fb81.3237.96ff@mx.google.com> Author: Armin Rigo Branch: Changeset: r3163:8076f2ac1bd2 Date: 2018-09-15 23:59 +0200 http://bitbucket.org/cffi/cffi/changeset/8076f2ac1bd2/ Log: Argh, bogus tests diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -353,11 +353,7 @@ py.test.raises(OverflowError, newp, pp, max + 1) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) py.test.raises(TypeError, newp, pp, 1.0) From pypy.commits at gmail.com Sat Sep 15 18:10:44 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 15:10:44 -0700 (PDT) Subject: [pypy-commit] pypy default: Argh, bogus tests Message-ID: <5b9d8364.1c69fb81.dc2e8.b344@mx.google.com> Author: Armin Rigo Branch: Changeset: r95126:314ddd2d83c5 Date: 2018-09-16 00:03 +0200 http://bitbucket.org/pypy/pypy/changeset/314ddd2d83c5/ Log: Argh, bogus tests diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -342,11 +342,7 @@ py.test.raises(OverflowError, newp, pp, max + 1) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) py.test.raises(TypeError, newp, pp, 1.0) From pypy.commits at gmail.com Sat Sep 15 18:10:46 2018 From: pypy.commits at gmail.com (arigo) Date: Sat, 15 Sep 2018 15:10:46 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5b9d8366.1c69fb81.a3b0d.8535@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95127:d9a97ab80c60 Date: 2018-09-16 00:03 +0200 http://bitbucket.org/pypy/pypy/changeset/d9a97ab80c60/ Log: hg merge default diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -342,11 +342,7 @@ py.test.raises(OverflowError, newp, pp, max + 1) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 32) - py.test.raises(OverflowError, newp, pp, min - 1 + 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 32) - py.test.raises(OverflowError, newp, pp, max + 1 - 2 ** 64) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) py.test.raises(TypeError, newp, pp, 1.0) From pypy.commits at gmail.com Sun Sep 16 14:24:34 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 11:24:34 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fix off-by-one Message-ID: <5b9e9fe2.1c69fb81.777c7.028d@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95128:5437a5a4f8de Date: 2018-09-16 21:22 +0300 http://bitbucket.org/pypy/pypy/changeset/5437a5a4f8de/ Log: fix off-by-one diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1070,14 +1070,14 @@ else: res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) + s, pos, pos+1) for cp in rutf8.Utf8StringIterator(res_8): if cp < 0xD800: _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', - s, pos-1, pos) + s, pos, pos+1) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) From pypy.commits at gmail.com Sun Sep 16 14:24:36 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 11:24:36 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fix some tests Message-ID: <5b9e9fe4.1c69fb81.51826.59a6@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95129:3e13faab9525 Date: 2018-09-16 21:22 +0300 http://bitbucket.org/pypy/pypy/changeset/3e13faab9525/ Log: fix some tests diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,8 +87,7 @@ return space.newint(uid) def str_w(self, space): - # Returns ascii-encoded str - return space.text_w(encode_object(space, self, 'ascii', 'strict')) + return space.text_w(encode_object(space, self, 'utf8', 'strict')) def utf8_w(self, space): return self._utf8 @@ -110,7 +109,9 @@ raise oefmt(space.w_TypeError, "cannot use unicode as modifiable buffer") - charbuf_w = str_w + def charbuf_w(self, space): + # Returns ascii-encoded str + return space.text_w(encode_object(space, self, 'ascii', 'strict')) def listview_utf8(self): assert self.is_ascii() @@ -1106,11 +1107,11 @@ encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': - s = space.utf8_w(w_obj) + s = space.charbuf_w(w_obj) unicodehelper.check_ascii_or_raise(space, s) return space.newutf8(s, len(s)) if encoding == 'utf-8' or encoding == 'utf8': - s = space.utf8_w(w_obj) + s = space.charbuf_w(w_obj) lgt = unicodehelper.check_utf8_or_raise(space, s) return space.newutf8(s, lgt) w_codecs = space.getbuiltinmodule("_codecs") From pypy.commits at gmail.com Sun Sep 16 14:24:38 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 11:24:38 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: merge default into branch Message-ID: <5b9e9fe6.1c69fb81.57692.9cc4@mx.google.com> Author: Matti Picus Branch: unicode-utf8 Changeset: r95130:fa230d2cf3b6 Date: 2018-09-16 21:23 +0300 http://bitbucket.org/pypy/pypy/changeset/fa230d2cf3b6/ Log: merge default into branch diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -311,9 +311,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( @@ -128,7 +134,7 @@ # (possibly) convert and cast a Python object to a long long. # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) try: bigint = space.bigint_w(w_ob, allow_conversion=False) @@ -145,27 +151,18 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut return space.int_w(w_ob) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + if _is_a_float(space, w_ob): + space.bigint_w(w_ob, allow_conversion=False) # raise the right error + return space.int_w(space.int(w_ob)) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. # This accepts a Python int too, and does convertions from other types of # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut value = space.int_w(w_ob) if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) @@ -190,8 +187,10 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned - if space.is_w(space.type(w_ob), space.w_int): # shortcut + if space.isinstance_w(w_ob, space.w_int): # shortcut value = space.int_w(w_ob) + if not we_are_translated(): + value = getattr(value, 'constant', value) # for NonConstant if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_uint(value) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,8 +334,18 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) From pypy.commits at gmail.com Sun Sep 16 15:57:58 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 12:57:58 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: store utf8 and fix off-by-one Message-ID: <5b9eb5c6.1c69fb81.990cf.712d@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95131:cdcddb46fce5 Date: 2018-09-16 21:26 +0300 http://bitbucket.org/pypy/pypy/changeset/cdcddb46fce5/ Log: store utf8 and fix off-by-one diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1206,14 +1206,15 @@ else: res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) - for cp in rutf8.Utf8StringIterator(res_8): - if cp < 0xD800: + s, pos, pos+1) + #for cp in rutf8.Utf8StringIterator(res_8): + for cp in res_8: + if cp < 0xD800 or allow_surrogates: _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', - s, pos-1, pos) + s, pos, pos+1) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) From pypy.commits at gmail.com Sun Sep 16 15:58:01 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 12:58:01 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: allow surrogates=True if error is 'surrogatespass', percolate kwarg Message-ID: <5b9eb5c9.1c69fb81.dc2e8.4453@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95132:fa345e6d0bd7 Date: 2018-09-16 21:27 +0300 http://bitbucket.org/pypy/pypy/changeset/fa345e6d0bd7/ Log: allow surrogates=True if error is 'surrogatespass', percolate kwarg diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -284,7 +284,7 @@ pos = rutf8._pos_at_index(s, newindex) return result.build() -def utf8_encode_ascii(s, errors, errorhandler): +def utf8_encode_ascii(s, errors, errorhandler, allow_surrogates=False): """ Don't be confused - this is a slowpath for errors e.g. "ignore" or an obscure errorhandler """ diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -649,9 +649,13 @@ w_arg = space.convert_arg_to_w_unicode(w_arg, errors) if errors is None: errors = 'strict' + allow_surrogates = False + if errors in ('surrogatepass',): + allow_surrogates = True state = space.fromcache(CodecState) ulen = w_arg._length - result = func(w_arg._utf8, errors, state.encode_error_handler) + result = func(w_arg._utf8, errors, state.encode_error_handler, + allow_surrogates=allow_surrogates) return space.newtuple([space.newbytes(result), space.newint(ulen)]) wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder From pypy.commits at gmail.com Sun Sep 16 15:58:03 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 12:58:03 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: first is utf8, not unicode Message-ID: <5b9eb5cb.1c69fb81.f017d.4fce@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95133:b87b3bf9ecaf Date: 2018-09-16 21:27 +0300 http://bitbucket.org/pypy/pypy/changeset/b87b3bf9ecaf/ Log: first is utf8, not unicode diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1170,7 +1170,7 @@ first = u[0] it = rutf8.Utf8StringIterator(u) code = it.next() - if not (unicodedb.isxidstart(code) or first == u'_'): + if not (unicodedb.isxidstart(code) or first == '_'): return False for ch in it: From pypy.commits at gmail.com Sun Sep 16 15:58:05 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 12:58:05 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5b9eb5cd.1c69fb81.c8c5.568b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95134:9f8dd0062596 Date: 2018-09-16 22:47 +0300 http://bitbucket.org/pypy/pypy/changeset/9f8dd0062596/ Log: merge py3.5 into branch diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -313,9 +313,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( @@ -129,21 +135,14 @@ # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. try: - value = space.int_w(w_ob) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise - else: - return value - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + bigint = space.bigint_w(w_ob, allow_conversion=True) try: return bigint.tolonglong() except OverflowError: @@ -151,20 +150,15 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut - return space.int_w(w_ob) try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + return space.int_w(w_ob, allow_conversion=True) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. @@ -172,23 +166,19 @@ # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. try: - value = space.int_w(w_ob) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise + if strict and _is_a_float(space, w_ob): + raise else: if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_ulonglong(value) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if strict and _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.toulonglong() @@ -202,13 +192,19 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if strict and _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + else: + if strict and value < 0: + raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) + return r_uint(value) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.touint() @@ -241,7 +237,12 @@ def _standard_object_as_bool(space, w_ob): if space.isinstance_w(w_ob, space.w_int): - return space.bigint_w(w_ob).tobool() + try: + return space.int_w(w_ob) != 0 + except OperationError as e: + if not e.match(space, space.w_OverflowError): + raise + return space.bigint_w(w_ob).tobool() if space.isinstance_w(w_ob, space.w_float): return space.float_w(w_ob) != 0.0 raise _NotStandardObject diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,8 +334,18 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) From pypy.commits at gmail.com Mon Sep 17 01:45:52 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 22:45:52 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: win32 fixes Message-ID: <5b9f3f90.1c69fb81.c71e1.88cf@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95135:8a769610ff91 Date: 2018-09-17 08:02 +0300 http://bitbucket.org/pypy/pypy/changeset/8a769610ff91/ Log: win32 fixes diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -6,3 +6,7 @@ * make sure we review all the places that call ord(unichr) to check for ValueErrors * rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes * revisit why runicode import str_decode_utf_8_impl needed instead of runicode import str_decode_utf_8 +* revisit all places where we do utf8.decode('utf-8'), they should work directly with utf8 + - rutf8.utf8_encode_mbcs + - unicodehelper.fsencode + - interp_posix.FileEncoder.as_unicode (used in win32) diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -77,16 +77,15 @@ def fsdecode(space, w_string): from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) + errorhandler=state.decode_error_handler, if _WIN32: bytes = space.bytes_w(w_string) - uni = str_decode_mbcs(bytes, 'strict', - errorhandler=decode_error_handler(space), + uni = str_decode_mbcs(bytes, 'strict', True, errorhandler, force_ignore=False)[0] elif _MACOSX: bytes = space.bytes_w(w_string) uni = str_decode_utf8( bytes, 'surrogateescape', final=True, - errorhandler=state.decode_error_handler, allow_surrogates=False)[0] elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized @@ -109,15 +108,14 @@ from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: - uni = space.utf8_w(w_uni) - bytes = unicode_encode_mbcs(uni, len(uni), 'strict', - errorhandler=encode_error_handler(space), - force_replace=False) + errorhandler=state.encode_error_handler, + utf8 = space.utf8_w(w_uni) + bytes = utf8_encode_mbcs(utf8, 'strict', errorhandler) elif _MACOSX: - uni = space.utf8_w(w_uni) + utf8 = space.utf8_w(w_uni) + errorhandler=state.encode_error_handler, bytes = unicodehelper.utf8_encode_utf_8( - uni, 'surrogateescape', - errorhandler=state.encode_error_handler, + utf8, 'surrogateescape', allow_surrogates=False) elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized @@ -314,16 +312,12 @@ if _WIN32: def utf8_encode_mbcs(s, errors, errorhandler): - s = s.decode('utf-8') - if errorhandler is None: - errorhandler = encode_error_handler(space) - res = unicode_encode_mbcs(s, slen, errors, errorhandler) + res = rutf8.utf8_encode_mbcs(s, errors, errorhandler, + force_replace=False) return res def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True): slen = len(s) - if errorhandler is None: - errorhandler = decode_error_handler(space) res, size = runicode.str_decode_mbcs(s, slen, errors, final=final, errorhandler=errorhandler, force_ignore=force_ignore) res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict') diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -56,7 +56,7 @@ return self.space.fsencode_w(self.w_obj) def as_unicode(self): - return self.space.unicode0_w(self.w_obj) + return self.space.utf8_w(self.w_obj).decode('utf8') class FileDecoder(object): is_unicode = False diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -810,3 +810,63 @@ res.append_slice(s, start, end) i = end return res.build() + +# ____________________________________________________________ +# MBCS codecs for Windows + +if sys.platform == 'win32': + from rpython.rtyper.lltypesystem import lltype, rffi + from rpython.rlib.runicode import CP_ACP, BOOLP, WideCharToMultiByte + from rpython.rlib import rwin32 + + def utf8_encode_mbcs(s, errors, errorhandler, + force_replace=True): + # TODO: do the encoding without decoding utf8 -> unicode + uni = s.decode('utf8') + lgt = len(uni) + if not force_replace and errors not in ('strict', 'replace'): + msg = "mbcs encoding does not support errors='%s'" % errors + errorhandler('strict', 'mbcs', msg, s, 0, 0) + + if lgt == 0: + return '' + + if force_replace or errors == 'replace': + flags = 0 + used_default_p = lltype.nullptr(BOOLP.TO) + else: + # strict + flags = rwin32.WC_NO_BEST_FIT_CHARS + used_default_p = lltype.malloc(BOOLP.TO, 1, flavor='raw') + used_default_p[0] = rffi.cast(rwin32.BOOL, False) + + try: + with rffi.scoped_nonmoving_unicodebuffer(uni) as dataptr: + # first get the size of the result + mbcssize = WideCharToMultiByte(CP_ACP, flags, + dataptr, lgt, None, 0, + None, used_default_p) + if mbcssize == 0: + raise rwin32.lastSavedWindowsError() + # If we used a default char, then we failed! + if (used_default_p and + rffi.cast(lltype.Bool, used_default_p[0])): + errorhandler('strict', 'mbcs', "invalid character", + s, 0, 0) + + with rffi.scoped_alloc_buffer(mbcssize) as buf: + # do the conversion + if WideCharToMultiByte(CP_ACP, flags, + dataptr, lgt, buf.raw, mbcssize, + None, used_default_p) == 0: + raise rwin32.lastSavedWindowsError() + if (used_default_p and + rffi.cast(lltype.Bool, used_default_p[0])): + errorhandler('strict', 'mbcs', "invalid character", + s, 0, 0) + result = buf.str(mbcssize) + assert result is not None + return result + finally: + if used_default_p: + lltype.free(used_default_p, flavor='raw') From pypy.commits at gmail.com Mon Sep 17 01:45:54 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 22:45:54 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge heads Message-ID: <5b9f3f92.1c69fb81.2ecf6.1e88@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95136:fa0322eb4ef4 Date: 2018-09-17 08:04 +0300 http://bitbucket.org/pypy/pypy/changeset/fa0322eb4ef4/ Log: merge heads diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -282,7 +282,7 @@ pos = rutf8._pos_at_index(s, newindex) return result.build() -def utf8_encode_ascii(s, errors, errorhandler): +def utf8_encode_ascii(s, errors, errorhandler, allow_surrogates=False): """ Don't be confused - this is a slowpath for errors e.g. "ignore" or an obscure errorhandler """ @@ -1200,14 +1200,15 @@ else: res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) - for cp in rutf8.Utf8StringIterator(res_8): - if cp < 0xD800: + s, pos, pos+1) + #for cp in rutf8.Utf8StringIterator(res_8): + for cp in res_8: + if cp < 0xD800 or allow_surrogates: _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', - s, pos-1, pos) + s, pos, pos+1) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -313,9 +313,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( @@ -129,21 +135,14 @@ # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. try: - value = space.int_w(w_ob) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise - else: - return value - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + bigint = space.bigint_w(w_ob, allow_conversion=True) try: return bigint.tolonglong() except OverflowError: @@ -151,20 +150,15 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut - return space.int_w(w_ob) try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + return space.int_w(w_ob, allow_conversion=True) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. @@ -172,23 +166,19 @@ # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. try: - value = space.int_w(w_ob) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise + if strict and _is_a_float(space, w_ob): + raise else: if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_ulonglong(value) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if strict and _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.toulonglong() @@ -202,13 +192,19 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if strict and _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + else: + if strict and value < 0: + raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) + return r_uint(value) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.touint() @@ -241,7 +237,12 @@ def _standard_object_as_bool(space, w_ob): if space.isinstance_w(w_ob, space.w_int): - return space.bigint_w(w_ob).tobool() + try: + return space.int_w(w_ob) != 0 + except OperationError as e: + if not e.match(space, space.w_OverflowError): + raise + return space.bigint_w(w_ob).tobool() if space.isinstance_w(w_ob, space.w_float): return space.float_w(w_ob) != 0.0 raise _NotStandardObject diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,8 +334,18 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -649,9 +649,13 @@ w_arg = space.convert_arg_to_w_unicode(w_arg, errors) if errors is None: errors = 'strict' + allow_surrogates = False + if errors in ('surrogatepass',): + allow_surrogates = True state = space.fromcache(CodecState) ulen = w_arg._length - result = func(w_arg._utf8, errors, state.encode_error_handler) + result = func(w_arg._utf8, errors, state.encode_error_handler, + allow_surrogates=allow_surrogates) return space.newtuple([space.newbytes(result), space.newint(ulen)]) wrap_encoder.__name__ = func.__name__ globals()[name] = wrap_encoder diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1170,7 +1170,7 @@ first = u[0] it = rutf8.Utf8StringIterator(u) code = it.next() - if not (unicodedb.isxidstart(code) or first == u'_'): + if not (unicodedb.isxidstart(code) or first == '_'): return False for ch in it: From pypy.commits at gmail.com Mon Sep 17 01:45:57 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 16 Sep 2018 22:45:57 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: add dummy allow_surrogates kwarg to all encoding functions, fix translation Message-ID: <5b9f3f95.1c69fb81.318a5.683b@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95137:613679b79386 Date: 2018-09-17 08:44 +0300 http://bitbucket.org/pypy/pypy/changeset/613679b79386/ Log: add dummy allow_surrogates kwarg to all encoding functions, fix translation diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -245,7 +245,7 @@ s = start + ru + end return s -def utf8_encode_latin_1(s, errors, errorhandler): +def utf8_encode_latin_1(s, errors, errorhandler, allow_surrogates=False): try: rutf8.check_ascii(s) return s @@ -311,7 +311,7 @@ return result.build() if _WIN32: - def utf8_encode_mbcs(s, errors, errorhandler): + def utf8_encode_mbcs(s, errors, errorhandler, allow_surrogates=False): res = rutf8.utf8_encode_mbcs(s, errors, errorhandler, force_replace=False) return res @@ -321,7 +321,7 @@ res, size = runicode.str_decode_mbcs(s, slen, errors, final=final, errorhandler=errorhandler, force_ignore=force_ignore) res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict') - return res_utf8, len(res) + return res_utf8, len(res), len(res) def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False): """ Same as checking for the valid utf8, but we know the utf8 is not @@ -686,7 +686,7 @@ for i in range(zeros-1, -1, -1): result.append(TABLE[(char >> (4 * i)) & 0x0f]) -def utf8_encode_raw_unicode_escape(s, errors, errorhandler): +def utf8_encode_raw_unicode_escape(s, errors, errorhandler, allow_surrogates=False): # errorhandler is not used: this function cannot cause Unicode errors size = len(s) if size == 0: @@ -705,7 +705,7 @@ return result.build() -def utf8_encode_unicode_escape(s, errors, errorhandler): +def utf8_encode_unicode_escape(s, errors, errorhandler, allow_surrogates=False): return _utf8_encode_unicode_escape(s) # ____________________________________________________________ @@ -938,7 +938,7 @@ assert final_length >= 0 return result.build()[:final_length], outsize, size -def utf8_encode_utf_7(s, errors, errorhandler): +def utf8_encode_utf_7(s, errors, errorhandler, allow_surrogates=False): size = len(s) if size == 0: return '' @@ -1002,7 +1002,7 @@ errorhandler=encode_unicode_error_handler(space), allow_surrogates=allow_surrogates) -def encode_utf8sp(space, uni): +def encode_utf8sp(space, uni, allow_surrogates=True): # Surrogate-preserving utf-8 encoding. Any surrogate character # turns into its 3-bytes encoding, whether it is paired or not. # This should always be reversible, and the reverse is @@ -1202,7 +1202,8 @@ errors, public_encoding_name, 'surrogates not allowed', s, pos, pos+1) #for cp in rutf8.Utf8StringIterator(res_8): - for cp in res_8: + for ch in res_8: + cp = ord(ch) if cp < 0xD800 or allow_surrogates: _STORECHAR(result, cp, byteorder) else: @@ -1566,7 +1567,7 @@ lgt = rutf8.check_utf8(r, True) return r, lgt -def utf8_encode_unicode_internal(s, errors, errorhandler): +def utf8_encode_unicode_internal(s, errors, errorhandler, allow_surrogates=False): size = len(s) if size == 0: return '' @@ -1625,7 +1626,7 @@ lgt = rutf8.codepoints_in_utf8(r) return r, lgt, pos -def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): +def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None, allow_surrogates=False): if mapping is None: return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) size = len(s) @@ -1667,7 +1668,7 @@ # ____________________________________________________________ # Decimal Encoder -def unicode_encode_decimal(s, errors, errorhandler=None): +def unicode_encode_decimal(s, errors, errorhandler=None, allow_surrogates=False): """Converts whitespace to ' ', decimal characters to their corresponding ASCII digit and all other Latin-1 characters except \0 as-is. Characters outside this range (Unicode ordinals 1-256) From pypy.commits at gmail.com Mon Sep 17 09:55:15 2018 From: pypy.commits at gmail.com (mattip) Date: Mon, 17 Sep 2018 06:55:15 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: adapt baf31e0bca1e (issue #2866) and more space.isinstance_w -> isinstance Message-ID: <5b9fb243.1c69fb81.a403d.f6fd@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95138:94c008d088ca Date: 2018-09-17 16:54 +0300 http://bitbucket.org/pypy/pypy/changeset/94c008d088ca/ Log: adapt baf31e0bca1e (issue #2866) and more space.isinstance_w -> isinstance diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -122,7 +122,7 @@ @staticmethod def convert_arg_to_w_unicode(space, w_other, strict=None): - if space.is_w(space.type(w_other), space.w_unicode): + if isinstance(w_other, W_UnicodeObject): return w_other if space.isinstance_w(w_other, space.w_bytes): raise oefmt(space.w_TypeError, @@ -187,15 +187,20 @@ def descr_new(space, w_unicodetype, w_object=None, w_encoding=None, w_errors=None): if w_object is None: - w_value = W_UnicodeObject.EMPTY + w_object = W_UnicodeObject.EMPTY + w_obj = w_object + + encoding, errors, allow_surrogates = _get_encoding_and_errors(space, w_encoding, + w_errors) + if encoding is None and errors is None: + # this is very quick if w_obj is already a w_unicode + w_value = unicode_from_object(space, w_obj) else: - encoding, errors, allow_surrogates = _get_encoding_and_errors(space, - w_encoding, w_errors) - if encoding is None and errors is None: - w_value = unicode_from_object(space, w_object) - else: - w_value = unicode_from_encoded_object(space, w_object, encoding, - errors) + if space.isinstance_w(w_obj, space.w_unicode): + raise oefmt(space.w_TypeError, + "decoding str is not supported") + w_value = unicode_from_encoded_object(space, w_obj, + encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value @@ -1254,7 +1259,7 @@ return space.newutf8(s, lgt) from pypy.module._codecs.interp_codecs import decode_text w_retval = decode_text(space, w_obj, encoding, errors) - if not space.isinstance_w(w_retval, space.w_unicode): + if not isinstance(w_retval, W_UnicodeObject): raise oefmt(space.w_TypeError, "'%s' decoder returned '%T' instead of 'str'; " "use codecs.decode() to decode to arbitrary types", @@ -1269,11 +1274,10 @@ if encoding is None: encoding = getdefaultencoding(space) w_retval = decode_object(space, w_obj, encoding, errors) - if not space.isinstance_w(w_retval, space.w_unicode): + if not isinstance(w_retval, W_UnicodeObject): raise oefmt(space.w_TypeError, "decoder did not return a str object (type '%T')", w_retval) - assert isinstance(w_retval, W_UnicodeObject) return w_retval From pypy.commits at gmail.com Tue Sep 18 21:27:36 2018 From: pypy.commits at gmail.com (rlamy) Date: Tue, 18 Sep 2018 18:27:36 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 Message-ID: <5ba1a608.1c69fb81.8f04d.de72@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95139:39e4b5e92c6c Date: 2018-09-19 02:26 +0100 http://bitbucket.org/pypy/pypy/changeset/39e4b5e92c6c/ Log: hg merge py3.5 diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -330,7 +330,8 @@ ------------- * Hash randomization (``-R``) `is ignored in PyPy`_. In CPython - before 3.4 it has `little point`_. + before 3.4 it has `little point`_. Both CPython >= 3.4 and PyPy3 + implement the randomized SipHash algorithm and ignore ``-R``. * You can't store non-string keys in type objects. For example:: diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -73,6 +73,9 @@ break raise self.line_num += 1 + if space.isinstance_w(w_line, space.w_bytes): + raise self.error(u"iterator should return strings, not bytes " + u"(did you open the file in text mode?") line = space.unicode_w(w_line) for c in line: if c == u'\0': diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py --- a/pypy/module/_csv/test/test_reader.py +++ b/pypy/module/_csv/test/test_reader.py @@ -33,7 +33,7 @@ def test_cannot_read_bytes(self): import _csv reader = _csv.reader([b'foo']) - raises((TypeError, _csv.Error), next, reader) + raises(_csv.Error, next, reader) def test_read_oddinputs(self): self._read_test([], []) diff --git a/pypy/module/cpyext/test0/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py rename from pypy/module/cpyext/test0/test_arraymodule.py rename to pypy/module/cpyext/test/test_arraymodule.py diff --git a/pypy/module/cpyext/test0/test_boolobject.py b/pypy/module/cpyext/test/test_boolobject.py rename from pypy/module/cpyext/test0/test_boolobject.py rename to pypy/module/cpyext/test/test_boolobject.py diff --git a/pypy/module/cpyext/test0/test_borrow.py b/pypy/module/cpyext/test/test_borrow.py rename from pypy/module/cpyext/test0/test_borrow.py rename to pypy/module/cpyext/test/test_borrow.py diff --git a/pypy/module/cpyext/test0/test_bytearrayobject.py b/pypy/module/cpyext/test/test_bytearrayobject.py rename from pypy/module/cpyext/test0/test_bytearrayobject.py rename to pypy/module/cpyext/test/test_bytearrayobject.py diff --git a/pypy/module/cpyext/test0/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py rename from pypy/module/cpyext/test0/test_bytesobject.py rename to pypy/module/cpyext/test/test_bytesobject.py diff --git a/pypy/module/cpyext/test0/test_capsule.py b/pypy/module/cpyext/test/test_capsule.py rename from pypy/module/cpyext/test0/test_capsule.py rename to pypy/module/cpyext/test/test_capsule.py diff --git a/pypy/module/cpyext/test0/test_cell.py b/pypy/module/cpyext/test/test_cell.py rename from pypy/module/cpyext/test0/test_cell.py rename to pypy/module/cpyext/test/test_cell.py diff --git a/pypy/module/cpyext/test0/test_classobject.py b/pypy/module/cpyext/test/test_classobject.py rename from pypy/module/cpyext/test0/test_classobject.py rename to pypy/module/cpyext/test/test_classobject.py diff --git a/pypy/module/cpyext/test0/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py rename from pypy/module/cpyext/test0/test_codecs.py rename to pypy/module/cpyext/test/test_codecs.py diff --git a/pypy/module/cpyext/test0/test_complexobject.py b/pypy/module/cpyext/test/test_complexobject.py rename from pypy/module/cpyext/test0/test_complexobject.py rename to pypy/module/cpyext/test/test_complexobject.py diff --git a/pypy/module/cpyext/test0/test_cparser.py b/pypy/module/cpyext/test/test_cparser.py rename from pypy/module/cpyext/test0/test_cparser.py rename to pypy/module/cpyext/test/test_cparser.py diff --git a/pypy/module/cpyext/test0/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py rename from pypy/module/cpyext/test0/test_datetime.py rename to pypy/module/cpyext/test/test_datetime.py diff --git a/pypy/module/cpyext/test0/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py rename from pypy/module/cpyext/test0/test_dictobject.py rename to pypy/module/cpyext/test/test_dictobject.py diff --git a/pypy/module/cpyext/test0/test_eval.py b/pypy/module/cpyext/test/test_eval.py rename from pypy/module/cpyext/test0/test_eval.py rename to pypy/module/cpyext/test/test_eval.py diff --git a/pypy/module/cpyext/test0/test_fileobject.py b/pypy/module/cpyext/test/test_fileobject.py rename from pypy/module/cpyext/test0/test_fileobject.py rename to pypy/module/cpyext/test/test_fileobject.py diff --git a/pypy/module/cpyext/test0/test_floatobject.py b/pypy/module/cpyext/test/test_floatobject.py rename from pypy/module/cpyext/test0/test_floatobject.py rename to pypy/module/cpyext/test/test_floatobject.py diff --git a/pypy/module/cpyext/test0/test_frameobject.py b/pypy/module/cpyext/test/test_frameobject.py rename from pypy/module/cpyext/test0/test_frameobject.py rename to pypy/module/cpyext/test/test_frameobject.py diff --git a/pypy/module/cpyext/test0/test_funcobject.py b/pypy/module/cpyext/test/test_funcobject.py rename from pypy/module/cpyext/test0/test_funcobject.py rename to pypy/module/cpyext/test/test_funcobject.py diff --git a/pypy/module/cpyext/test0/test_genobject.py b/pypy/module/cpyext/test/test_genobject.py rename from pypy/module/cpyext/test0/test_genobject.py rename to pypy/module/cpyext/test/test_genobject.py diff --git a/pypy/module/cpyext/test0/test_getargs.py b/pypy/module/cpyext/test/test_getargs.py rename from pypy/module/cpyext/test0/test_getargs.py rename to pypy/module/cpyext/test/test_getargs.py diff --git a/pypy/module/cpyext/test0/test_import.py b/pypy/module/cpyext/test/test_import.py rename from pypy/module/cpyext/test0/test_import.py rename to pypy/module/cpyext/test/test_import.py diff --git a/pypy/module/cpyext/test0/test_iterator.py b/pypy/module/cpyext/test/test_iterator.py rename from pypy/module/cpyext/test0/test_iterator.py rename to pypy/module/cpyext/test/test_iterator.py diff --git a/pypy/module/cpyext/test0/test_listobject.py b/pypy/module/cpyext/test/test_listobject.py rename from pypy/module/cpyext/test0/test_listobject.py rename to pypy/module/cpyext/test/test_listobject.py diff --git a/pypy/module/cpyext/test0/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py rename from pypy/module/cpyext/test0/test_longobject.py rename to pypy/module/cpyext/test/test_longobject.py diff --git a/pypy/module/cpyext/test0/test_mapping.py b/pypy/module/cpyext/test/test_mapping.py rename from pypy/module/cpyext/test0/test_mapping.py rename to pypy/module/cpyext/test/test_mapping.py diff --git a/pypy/module/cpyext/test0/test_marshal.py b/pypy/module/cpyext/test/test_marshal.py rename from pypy/module/cpyext/test0/test_marshal.py rename to pypy/module/cpyext/test/test_marshal.py diff --git a/pypy/module/cpyext/test0/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py rename from pypy/module/cpyext/test0/test_memoryobject.py rename to pypy/module/cpyext/test/test_memoryobject.py diff --git a/pypy/module/cpyext/test0/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py rename from pypy/module/cpyext/test0/test_methodobject.py rename to pypy/module/cpyext/test/test_methodobject.py diff --git a/pypy/module/cpyext/test0/test_module.py b/pypy/module/cpyext/test/test_module.py rename from pypy/module/cpyext/test0/test_module.py rename to pypy/module/cpyext/test/test_module.py diff --git a/pypy/module/cpyext/test0/test_ndarrayobject.py b/pypy/module/cpyext/test/test_ndarrayobject.py rename from pypy/module/cpyext/test0/test_ndarrayobject.py rename to pypy/module/cpyext/test/test_ndarrayobject.py diff --git a/pypy/module/cpyext/test0/test_number.py b/pypy/module/cpyext/test/test_number.py rename from pypy/module/cpyext/test0/test_number.py rename to pypy/module/cpyext/test/test_number.py diff --git a/pypy/module/cpyext/test1/test_object.py b/pypy/module/cpyext/test/test_object.py rename from pypy/module/cpyext/test1/test_object.py rename to pypy/module/cpyext/test/test_object.py diff --git a/pypy/module/cpyext/test1/test_pycobject.py b/pypy/module/cpyext/test/test_pycobject.py rename from pypy/module/cpyext/test1/test_pycobject.py rename to pypy/module/cpyext/test/test_pycobject.py diff --git a/pypy/module/cpyext/test1/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py rename from pypy/module/cpyext/test1/test_pyerrors.py rename to pypy/module/cpyext/test/test_pyerrors.py diff --git a/pypy/module/cpyext/test1/test_pyfile.py b/pypy/module/cpyext/test/test_pyfile.py rename from pypy/module/cpyext/test1/test_pyfile.py rename to pypy/module/cpyext/test/test_pyfile.py diff --git a/pypy/module/cpyext/test1/test_pysignals.py b/pypy/module/cpyext/test/test_pysignals.py rename from pypy/module/cpyext/test1/test_pysignals.py rename to pypy/module/cpyext/test/test_pysignals.py diff --git a/pypy/module/cpyext/test1/test_pystate.py b/pypy/module/cpyext/test/test_pystate.py rename from pypy/module/cpyext/test1/test_pystate.py rename to pypy/module/cpyext/test/test_pystate.py diff --git a/pypy/module/cpyext/test1/test_pystrtod.py b/pypy/module/cpyext/test/test_pystrtod.py rename from pypy/module/cpyext/test1/test_pystrtod.py rename to pypy/module/cpyext/test/test_pystrtod.py diff --git a/pypy/module/cpyext/test1/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py rename from pypy/module/cpyext/test1/test_sequence.py rename to pypy/module/cpyext/test/test_sequence.py diff --git a/pypy/module/cpyext/test1/test_setobject.py b/pypy/module/cpyext/test/test_setobject.py rename from pypy/module/cpyext/test1/test_setobject.py rename to pypy/module/cpyext/test/test_setobject.py diff --git a/pypy/module/cpyext/test1/test_sliceobject.py b/pypy/module/cpyext/test/test_sliceobject.py rename from pypy/module/cpyext/test1/test_sliceobject.py rename to pypy/module/cpyext/test/test_sliceobject.py diff --git a/pypy/module/cpyext/test1/test_structseq.py b/pypy/module/cpyext/test/test_structseq.py rename from pypy/module/cpyext/test1/test_structseq.py rename to pypy/module/cpyext/test/test_structseq.py diff --git a/pypy/module/cpyext/test1/test_sysmodule.py b/pypy/module/cpyext/test/test_sysmodule.py rename from pypy/module/cpyext/test1/test_sysmodule.py rename to pypy/module/cpyext/test/test_sysmodule.py diff --git a/pypy/module/cpyext/test1/test_thread.py b/pypy/module/cpyext/test/test_thread.py rename from pypy/module/cpyext/test1/test_thread.py rename to pypy/module/cpyext/test/test_thread.py diff --git a/pypy/module/cpyext/test1/test_traceback.py b/pypy/module/cpyext/test/test_traceback.py rename from pypy/module/cpyext/test1/test_traceback.py rename to pypy/module/cpyext/test/test_traceback.py diff --git a/pypy/module/cpyext/test1/test_translate.py b/pypy/module/cpyext/test/test_translate.py rename from pypy/module/cpyext/test1/test_translate.py rename to pypy/module/cpyext/test/test_translate.py diff --git a/pypy/module/cpyext/test1/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py rename from pypy/module/cpyext/test1/test_tupleobject.py rename to pypy/module/cpyext/test/test_tupleobject.py diff --git a/pypy/module/cpyext/test1/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py rename from pypy/module/cpyext/test1/test_typeobject.py rename to pypy/module/cpyext/test/test_typeobject.py diff --git a/pypy/module/cpyext/test1/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py rename from pypy/module/cpyext/test1/test_unicodeobject.py rename to pypy/module/cpyext/test/test_unicodeobject.py diff --git a/pypy/module/cpyext/test1/test_userslots.py b/pypy/module/cpyext/test/test_userslots.py rename from pypy/module/cpyext/test1/test_userslots.py rename to pypy/module/cpyext/test/test_userslots.py diff --git a/pypy/module/cpyext/test1/test_version.py b/pypy/module/cpyext/test/test_version.py rename from pypy/module/cpyext/test1/test_version.py rename to pypy/module/cpyext/test/test_version.py diff --git a/pypy/module/cpyext/test1/test_weakref.py b/pypy/module/cpyext/test/test_weakref.py rename from pypy/module/cpyext/test1/test_weakref.py rename to pypy/module/cpyext/test/test_weakref.py diff --git a/pypy/module/cpyext/test0/__init__.py b/pypy/module/cpyext/test0/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test0/conftest.py b/pypy/module/cpyext/test0/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test0/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) diff --git a/pypy/module/cpyext/test1/__init__.py b/pypy/module/cpyext/test1/__init__.py deleted file mode 100644 diff --git a/pypy/module/cpyext/test1/conftest.py b/pypy/module/cpyext/test1/conftest.py deleted file mode 100644 --- a/pypy/module/cpyext/test1/conftest.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import pytest - -def pytest_configure(config): - if config.option.runappdirect: - import sys - import py - from pypy import pypydir - sys.path.append(str(py.path.local(pypydir) / 'tool' / 'cpyext')) - return - from pypy.tool.pytest.objspace import gettestobjspace - # For some reason (probably a ll2ctypes cache issue on linux64) - # it's necessary to run "import time" at least once before any - # other cpyext test, otherwise the same statement will fail in - # test_datetime.py. - space = gettestobjspace(usemodules=['time']) - space.getbuiltinmodule("time") - -def pytest_ignore_collect(path, config): - # ensure additional functions are registered - import pypy.module.cpyext.test.test_cpyext - return False - -def pytest_funcarg__api(request): - return request.cls.api - -if os.name == 'nt': - @pytest.yield_fixture(autouse=True, scope='session') - def prevent_dialog_box(): - """Do not open dreaded dialog box on segfault on Windows""" - import ctypes - SEM_NOGPFAULTERRORBOX = 0x0002 # From MSDN - old_err_mode = ctypes.windll.kernel32.GetErrorMode() - new_err_mode = old_err_mode | SEM_NOGPFAULTERRORBOX - ctypes.windll.kernel32.SetErrorMode(new_err_mode) - yield - ctypes.windll.kernel32.SetErrorMode(old_err_mode) diff --git a/pypy/module/select/__init__.py b/pypy/module/select/__init__.py --- a/pypy/module/select/__init__.py +++ b/pypy/module/select/__init__.py @@ -3,6 +3,7 @@ import sys import os +from rpython.rlib import _rsocket_rffi as _c class Module(MixedModule): @@ -31,6 +32,10 @@ for symbol in symbol_map: interpleveldefs[symbol] = "space.wrap(interp_kqueue.%s)" % symbol + if _c.PIPE_BUF is not None: + interpleveldefs['PIPE_BUF'] = 'space.wrap(%r)' % _c.PIPE_BUF + + def buildloaders(cls): from rpython.rlib import rpoll for name in rpoll.eventnames: diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py --- a/pypy/module/select/test/test_select.py +++ b/pypy/module/select/test/test_select.py @@ -319,6 +319,11 @@ # ^^^ CPython gives 100, PyPy gives 1. I think both are OK as # long as there is no crash. + def test_PIPE_BUF(self): + # no PIPE_BUF on Windows; this test class is skipped on Windows. + import select + assert isinstance(select.PIPE_BUF, int) + class AppTestSelectWithSockets(_AppTestSelect): """Same tests with connected sockets. diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -397,14 +397,14 @@ assert str(123) == '123' assert str(object=123) == '123' assert str([2, 3]) == '[2, 3]' - assert str(errors='strict') == '' + #assert str(errors='strict') == '' --- obscure case, disabled for now class U(str): pass assert str(U()).__class__ is str assert U().__str__().__class__ is str assert U('test') == 'test' assert U('test').__class__ is U - assert U(errors='strict') == U('') + #assert U(errors='strict') == U('') --- obscure case, disabled for now def test_call_unicode_2(self): class X(object): @@ -1088,3 +1088,31 @@ assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345' assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 ' + def test_unicode_constructor_misc(self): + x = u'foo' + x += u'bar' + assert str(x) is x + # + class U(str): + def __str__(self): + return u'BOK' + u = U(x) + assert str(u) == u'BOK' + # + class U2(str): + pass + z = U2(u'foobaz') + assert type(str(z)) is str + assert str(z) == u'foobaz' + # + # two completely corner cases where we differ from CPython: + #assert unicode(encoding='supposedly_the_encoding') == u'' + #assert unicode(errors='supposedly_the_error') == u'' + e = raises(TypeError, str, u'', 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u'', errors='supposedly_the_error') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, u, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' + e = raises(TypeError, str, z, 'supposedly_the_encoding') + assert str(e.value) == 'decoding str is not supported' diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -224,15 +224,20 @@ def descr_new(space, w_unicodetype, w_object=None, w_encoding=None, w_errors=None): if w_object is None: - w_value = W_UnicodeObject.EMPTY + w_object = W_UnicodeObject.EMPTY + w_obj = w_object + + encoding, errors = _get_encoding_and_errors(space, w_encoding, + w_errors) + if encoding is None and errors is None: + # this is very quick if w_obj is already a w_unicode + w_value = unicode_from_object(space, w_obj) else: - encoding, errors = _get_encoding_and_errors(space, w_encoding, - w_errors) - if encoding is None and errors is None: - w_value = unicode_from_object(space, w_object) - else: - w_value = unicode_from_encoded_object(space, w_object, - encoding, errors) + if space.isinstance_w(w_obj, space.w_unicode): + raise oefmt(space.w_TypeError, + "decoding str is not supported") + w_value = unicode_from_encoded_object(space, w_obj, + encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py --- a/pypy/testrunner_cfg.py +++ b/pypy/testrunner_cfg.py @@ -5,7 +5,7 @@ 'translator/c', 'rlib', 'memory/test', 'jit/metainterp', 'jit/backend/arm', 'jit/backend/x86', - 'jit/backend/zarch', + 'jit/backend/zarch', 'module/cpyext/test', ] def collect_one_testdir(testdirs, reldir, tests): diff --git a/rpython/doc/examples.rst b/rpython/doc/examples.rst --- a/rpython/doc/examples.rst +++ b/rpython/doc/examples.rst @@ -19,7 +19,7 @@ * Typhon, 'A virtual machine for Monte', in active development, https://github.com/monte-language/typhon * Tulip, an untyped functional language, in language design mode, maintained, - https://github.com/tulip-lang/tulip/ + https://github.com/tulip-lang/tulip/ * Pycket, a Racket implementation, proof of concept, small language core working, a lot of primitives are missing. Slow development https://github.com/samth/pycket diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py --- a/rpython/rlib/_rsocket_rffi.py +++ b/rpython/rlib/_rsocket_rffi.py @@ -33,6 +33,7 @@ 'arpa/inet.h', 'stdint.h', 'errno.h', + 'limits.h', ) if _HAS_AF_PACKET: includes += ('netpacket/packet.h', @@ -113,6 +114,7 @@ F_GETFL = platform.DefinedConstantInteger('F_GETFL') F_SETFL = platform.DefinedConstantInteger('F_SETFL') FIONBIO = platform.DefinedConstantInteger('FIONBIO') + PIPE_BUF = platform.DefinedConstantInteger('PIPE_BUF') INVALID_SOCKET = platform.DefinedConstantInteger('INVALID_SOCKET') INET_ADDRSTRLEN = platform.DefinedConstantInteger('INET_ADDRSTRLEN') @@ -1085,6 +1087,7 @@ WSAEWOULDBLOCK = cConfig.WSAEWOULDBLOCK or cConfig.EWOULDBLOCK WSAEAFNOSUPPORT = cConfig.WSAEAFNOSUPPORT or cConfig.EAFNOSUPPORT EISCONN = cConfig.EISCONN or cConfig.WSAEISCONN +PIPE_BUF = cConfig.PIPE_BUF # may be None linux = cConfig.linux WIN32 = cConfig.WIN32 From pypy.commits at gmail.com Wed Sep 19 09:15:26 2018 From: pypy.commits at gmail.com (john-aldis-osirium) Date: Wed, 19 Sep 2018 06:15:26 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Skip test that uses pythonapi on pypy Message-ID: <5ba24bee.1c69fb81.3ac5d.b0b0@mx.google.com> Author: John Aldis Branch: py3.6 Changeset: r95140:386a109966ab Date: 2018-09-19 11:28 +0100 http://bitbucket.org/pypy/pypy/changeset/386a109966ab/ Log: Skip test that uses pythonapi on pypy diff --git a/lib-python/3/test/test_bytes.py b/lib-python/3/test/test_bytes.py --- a/lib-python/3/test/test_bytes.py +++ b/lib-python/3/test/test_bytes.py @@ -17,7 +17,7 @@ import test.support import test.string_tests import test.list_tests -from test.support import bigaddrspacetest, MAX_Py_ssize_t +from test.support import bigaddrspacetest, MAX_Py_ssize_t, cpython_only if sys.flags.bytes_warning: @@ -780,6 +780,7 @@ self.assertIs(type(BytesSubclass(A())), BytesSubclass) # Test PyBytes_FromFormat() + @cpython_only def test_from_format(self): ctypes = test.support.import_module('ctypes') _testcapi = test.support.import_module('_testcapi') From pypy.commits at gmail.com Wed Sep 19 10:25:35 2018 From: pypy.commits at gmail.com (rlamy) Date: Wed, 19 Sep 2018 07:25:35 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Merged in davidcellis/pypy/py3.6 (pull request #626) Message-ID: <5ba25c5f.1c69fb81.b76bb.50e3@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95143:ea84fbbcd9bd Date: 2018-09-19 13:43 +0000 http://bitbucket.org/pypy/pypy/changeset/ea84fbbcd9bd/ Log: Merged in davidcellis/pypy/py3.6 (pull request #626) Add GC collects to tests expecting resource warnings diff --git a/lib-python/3/test/lock_tests.py b/lib-python/3/test/lock_tests.py --- a/lib-python/3/test/lock_tests.py +++ b/lib-python/3/test/lock_tests.py @@ -2,6 +2,7 @@ Various tests for synchronization primitives. """ +import gc import sys import time from _thread import start_new_thread, TIMEOUT_MAX @@ -208,6 +209,7 @@ lock = self.locktype() ref = weakref.ref(lock) del lock + gc.collect() self.assertIsNone(ref()) diff --git a/lib-python/3/test/test_warnings/__init__.py b/lib-python/3/test/test_warnings/__init__.py --- a/lib-python/3/test/test_warnings/__init__.py +++ b/lib-python/3/test/test_warnings/__init__.py @@ -848,10 +848,15 @@ with open(support.TESTFN, 'w') as fp: fp.write(textwrap.dedent(""" + import gc + def func(): f = open(__file__) + # Fully initialise GC for clearer error + gc.collect() # Emit ResourceWarning f = None + gc.collect() func() """)) @@ -863,12 +868,12 @@ stderr = '\n'.join(stderr.splitlines()) stderr = re.sub('<.*>', '<...>', stderr) expected = textwrap.dedent(''' - {fname}:5: ResourceWarning: unclosed file <...> + {fname}:9: ResourceWarning: unclosed file <...> f = None Object allocated at (most recent call first): - File "{fname}", lineno 3 + File "{fname}", lineno 5 f = open(__file__) - File "{fname}", lineno 7 + File "{fname}", lineno 12 func() ''') expected = expected.format(fname=support.TESTFN).strip() From pypy.commits at gmail.com Wed Sep 19 10:26:26 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Wed, 19 Sep 2018 07:26:26 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Add GC collect to lock_tests weakref delete test. Message-ID: <5ba25c92.1c69fb81.eca1c.e6bb@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95141:5f0d3bf975f8 Date: 2018-09-19 12:18 +0100 http://bitbucket.org/pypy/pypy/changeset/5f0d3bf975f8/ Log: Add GC collect to lock_tests weakref delete test. diff --git a/lib-python/3/test/lock_tests.py b/lib-python/3/test/lock_tests.py --- a/lib-python/3/test/lock_tests.py +++ b/lib-python/3/test/lock_tests.py @@ -2,6 +2,7 @@ Various tests for synchronization primitives. """ +import gc import sys import time from _thread import start_new_thread, TIMEOUT_MAX @@ -208,6 +209,7 @@ lock = self.locktype() ref = weakref.ref(lock) del lock + gc.collect() self.assertIsNone(ref()) From pypy.commits at gmail.com Wed Sep 19 10:26:28 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Wed, 19 Sep 2018 07:26:28 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Add GC collects to test_tracemalloc - Now gives the expected warning Message-ID: <5ba25c94.1c69fb81.4bb10.0381@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95142:7eda7fcc5e0f Date: 2018-09-19 14:06 +0100 http://bitbucket.org/pypy/pypy/changeset/7eda7fcc5e0f/ Log: Add GC collects to test_tracemalloc - Now gives the expected warning Test still fails as the message is different diff --git a/lib-python/3/test/test_warnings/__init__.py b/lib-python/3/test/test_warnings/__init__.py --- a/lib-python/3/test/test_warnings/__init__.py +++ b/lib-python/3/test/test_warnings/__init__.py @@ -848,10 +848,15 @@ with open(support.TESTFN, 'w') as fp: fp.write(textwrap.dedent(""" + import gc + def func(): f = open(__file__) + # Fully initialise GC for clearer error + gc.collect() # Emit ResourceWarning f = None + gc.collect() func() """)) @@ -863,12 +868,12 @@ stderr = '\n'.join(stderr.splitlines()) stderr = re.sub('<.*>', '<...>', stderr) expected = textwrap.dedent(''' - {fname}:5: ResourceWarning: unclosed file <...> + {fname}:9: ResourceWarning: unclosed file <...> f = None Object allocated at (most recent call first): - File "{fname}", lineno 3 + File "{fname}", lineno 5 f = open(__file__) - File "{fname}", lineno 7 + File "{fname}", lineno 12 func() ''') expected = expected.format(fname=support.TESTFN).strip() From pypy.commits at gmail.com Wed Sep 19 11:41:11 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 19 Sep 2018 08:41:11 -0700 (PDT) Subject: [pypy-commit] cffi default: Issue #384 Message-ID: <5ba26e17.1c69fb81.c71e1.0e05@mx.google.com> Author: Armin Rigo Branch: Changeset: r3164:e2e324a2f13e Date: 2018-09-19 17:05 +0200 http://bitbucket.org/cffi/cffi/changeset/e2e324a2f13e/ Log: Issue #384 Un-ignore the warnings when testing for them, in case someone runs py.test with the PYTHONWARNINGS environment variable set diff --git a/c/test_c.py b/c/test_c.py --- a/c/test_c.py +++ b/c/test_c.py @@ -3968,6 +3968,7 @@ z3 = cast(BVoidP, 0) z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 newp(new_pointer_type(BVoidP), z1) # fine diff --git a/testing/cffi0/backend_tests.py b/testing/cffi0/backend_tests.py --- a/testing/cffi0/backend_tests.py +++ b/testing/cffi0/backend_tests.py @@ -1386,6 +1386,7 @@ ffi = FFI(backend=self.Backend()) ffi.cdef("enum foo;") with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") n = ffi.cast("enum foo", -1) assert int(n) == 0xffffffff assert str(log[0].message) == ( From pypy.commits at gmail.com Wed Sep 19 11:43:59 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 19 Sep 2018 08:43:59 -0700 (PDT) Subject: [pypy-commit] pypy default: import cffi/e2e324a2f13e Message-ID: <5ba26ebf.1c69fb81.f4714.4207@mx.google.com> Author: Armin Rigo Branch: Changeset: r95144:de869e4e2609 Date: 2018-09-19 17:15 +0200 http://bitbucket.org/pypy/pypy/changeset/de869e4e2609/ Log: import cffi/e2e324a2f13e diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -162,6 +162,17 @@ module_path = module_name.split('.') module_path[-1] += '.py' generate_mod(os.path.join(self.build_lib, *module_path)) + def get_source_files(self): + # This is called from 'setup.py sdist' only. Exclude + # the generate .py module in this case. + saved_py_modules = self.py_modules + try: + if saved_py_modules: + self.py_modules = [m for m in saved_py_modules + if m != module_name] + return base_class.get_source_files(self) + finally: + self.py_modules = saved_py_modules dist.cmdclass['build_py'] = build_py_make_mod # distutils and setuptools have no notion I could find of a @@ -171,6 +182,7 @@ # the module. So we add it here, which gives a few apparently # harmless warnings about not finding the file outside the # build directory. + # Then we need to hack more in get_source_files(); see above. if dist.py_modules is None: dist.py_modules = [] dist.py_modules.append(module_name) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3957,6 +3957,7 @@ z3 = cast(BVoidP, 0) z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 newp(new_pointer_type(BVoidP), z1) # fine diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py @@ -1387,6 +1387,7 @@ ffi = FFI(backend=self.Backend()) ffi.cdef("enum foo;") with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") n = ffi.cast("enum foo", -1) assert int(n) == 0xffffffff assert str(log[0].message) == ( diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py @@ -46,14 +46,14 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_sin_no_return_value(self): + def test_lround_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void sin(double x); + void lround(double x); """) m = ffi.dlopen(lib_m) - x = m.sin(1.23) + x = m.lround(1.23) assert x is None def test_dlopen_filename(self): From pypy.commits at gmail.com Wed Sep 19 11:46:24 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 19 Sep 2018 08:46:24 -0700 (PDT) Subject: [pypy-commit] pypy default: fix test Message-ID: <5ba26f50.1c69fb81.23eb2.ccdb@mx.google.com> Author: Armin Rigo Branch: Changeset: r95145:4ffcc8f9acd4 Date: 2018-09-19 17:22 +0200 http://bitbucket.org/pypy/pypy/changeset/4ffcc8f9acd4/ Log: fix test diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,3 +36,6 @@ .. branch: pyparser-improvements-3 Small refactorings in the Python parser. + +.. branch: fix-readme-typo + From pypy.commits at gmail.com Wed Sep 19 12:38:46 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 19 Sep 2018 09:38:46 -0700 (PDT) Subject: [pypy-commit] pypy default: oops Message-ID: <5ba27b96.1c69fb81.5e302.3ce9@mx.google.com> Author: Armin Rigo Branch: Changeset: r95147:99b6e798bdad Date: 2018-09-19 18:36 +0200 http://bitbucket.org/pypy/pypy/changeset/99b6e798bdad/ Log: oops diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -136,7 +136,7 @@ max_jitdriver = jit.JitDriver(name='max', greens=['has_key', 'has_item', 'w_type'], reds='auto') - at specialize.arg(2) + at specialize.arg(3) def min_max_sequence(space, w_sequence, w_key, implementation_of): if implementation_of == "max": compare = space.gt @@ -172,7 +172,7 @@ raise oefmt(space.w_ValueError, "arg is an empty sequence") return w_max_item - at specialize.arg(2) + at specialize.arg(3) @jit.look_inside_iff(lambda space, args_w, w_key, implementation_of: jit.loop_unrolling_heuristic(args_w, len(args_w), 3)) def min_max_multiple_args(space, args_w, w_key, implementation_of): From pypy.commits at gmail.com Wed Sep 19 12:49:06 2018 From: pypy.commits at gmail.com (arigo) Date: Wed, 19 Sep 2018 09:49:06 -0700 (PDT) Subject: [pypy-commit] pypy default: Fix the cpython test for newer pypy versions, with a comment Message-ID: <5ba27e02.1c69fb81.cbd8b.5878@mx.google.com> Author: Armin Rigo Branch: Changeset: r95148:c8d93fa469ba Date: 2018-09-19 18:48 +0200 http://bitbucket.org/pypy/pypy/changeset/c8d93fa469ba/ Log: Fix the cpython test for newer pypy versions, with a comment diff --git a/lib-python/2.7/test/test_inspect.py b/lib-python/2.7/test/test_inspect.py --- a/lib-python/2.7/test/test_inspect.py +++ b/lib-python/2.7/test/test_inspect.py @@ -45,6 +45,9 @@ git = mod.StupidGit() +class ExampleClassWithSlot(object): + __slots__ = 'myslot' + class IsTestBase(unittest.TestCase): predicates = set([inspect.isbuiltin, inspect.isclass, inspect.iscode, inspect.isframe, inspect.isfunction, inspect.ismethod, @@ -96,7 +99,11 @@ else: self.assertFalse(inspect.isgetsetdescriptor(type(tb.tb_frame).f_locals)) if hasattr(types, 'MemberDescriptorType'): - self.istest(inspect.ismemberdescriptor, 'type(lambda: None).func_globals') + # App-level slots are member descriptors on both PyPy and + # CPython, but the various built-in attributes are all + # getsetdescriptors on PyPy. So check ismemberdescriptor() + # with an app-level slot. + self.istest(inspect.ismemberdescriptor, 'ExampleClassWithSlot.myslot') else: self.assertFalse(inspect.ismemberdescriptor(type(lambda: None).func_globals)) From pypy.commits at gmail.com Thu Sep 20 06:29:25 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 03:29:25 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: add a section about bad C APIs Message-ID: <5ba37685.1c69fb81.3237.15bd@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5904:fbd92c0e1a20 Date: 2018-09-20 10:53 +0200 http://bitbucket.org/pypy/extradoc/changeset/fbd92c0e1a20/ Log: add a section about bad C APIs diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -87,10 +87,6 @@ To understand some of cpyext challenges, you need to have at least a rough idea of how the PyPy GC works. -XXX: maybe the following section is too detailed and not really necessary to -understand cpyext? We could simplify it by saying "PyPy uses a generational -GC, objects can move". - Contrarily to the popular belief, the "Garbage Collector" is not only about collecting garbage: instead, it is generally responsible of all memory management, including allocation and deallocation. @@ -136,7 +132,7 @@ pass them to C extensions. We surely need a way to handle that. -`PyObject*` in PyPy +``PyObject*`` in PyPy --------------------- Another challenge is that sometimes, ``PyObject*`` structs are not completely @@ -151,7 +147,7 @@ So, we have two issues so far: objects which can move, and incompatible low-level layouts. ``cpyext`` solves both by decoupling the RPython and the C representations: we have two "views" of the same entity, depending on whether -we are in the PyPy world (the moving ``W_Root`` subclass) or in the C world +we are in the PyPy world (the movable ``W_Root`` subclass) or in the C world (the non-movable ``PyObject*``). ``PyObject*`` are created lazily, only when they are actually needed: the @@ -243,7 +239,7 @@ About the GIL, we won't dig into details of `how it is handled in cpyext`_: for the purpose of this post, it is enough to know that whenever we enter the -C land, we store the current theead id into a global variable which is +C land, we store the current thread id into a global variable which is accessible also from C; conversely, whenever we go back from RPython to C, we restore this value to 0. @@ -368,8 +364,8 @@ The solution is simple: rewrite as much as we can in C instead of RPython, so to avoid unnecessary roundtrips: this was the topic of most of the Cape Town -sprint and resulted in the ``cpyext-avoid-roundtrip``, which was eventually -merged_. +sprint and resulted in the ``cpyext-avoid-roundtrip`` branch, which was +eventually merged_. Of course, it is not possible to move **everything** to C: there are still operations which need to be implemented in RPython. For example, think of @@ -443,8 +439,8 @@ managed, etc., and we can assume that allocation costs are the same than on CPython. -However, as soon as we return these ``PyObject*`` Python, we need to allocate -its ``W_Root`` equivalent: if you do it in a small loop like in the example +As soon as we return these ``PyObject*`` to Python, we need to allocate +theirs ``W_Root`` equivalent: if you do it in a small loop like in the example above, you end up allocating all these ``W_Root`` inside the nursery, which is a good thing since allocation is super fast (see the section above about the PyPy GC). @@ -471,6 +467,56 @@ C API quirks -------------------- -XXX explain why borrowed references are a problem for us; possibly link to: https://pythoncapi.readthedocs.io/bad_api.html#borrowed-references +Finally, there is another source of slowdown which is beyond our control: some +parts of the CPython C API are badly designed and expose some of the +implementation details of CPython. -the calling convention is inefficient: why do I have to allocate a PyTuple* of PyObect*, just to unwrap them immediately? +The major example is reference counting: the ``Py_INCREF`` / ``Py_DECREF`` API +is designed in such a way which forces other implementation to emulate +refcounting even in presence of other GC management schemes, as explained +above. + +Another example is borrowed references: there are API functions which **do +not** incref an object before returning it, e.g. `PyList_GetItem`_. This is +done for performance reasons because we can avoid a whole incref/decref pair, +if the caller needs to handle the returned item only temporarily: the item is +kept alive because it is in the list anyway. + +For PyPy this is a challenge: thanks to `list strategies`_, often lists are +represented in a compact way: e.g. a list containing only integers is stored +as a C array of ``long``. How to implement ``PyList_GetItem``? We cannot +simply create a ``PyObject*`` on the fly, because the caller will never decref +it and it will result in a memory leak. + +The current solution is very inefficient: basically, the first time we do a +``PyList_GetItem``, we convert_ the **whole** list to a list of +``PyObject*``. This is bad in two ways: the first is that we potentially pay a +lot of unneeded conversion cost in case we will never access the other items +of the list; the second is that by doing that we lose all the performance +benefit granted by the original list strategy, making it slower even for the +rest of pure-python code which will manipulate the list later. + +``PyList_GetItem`` is an example of a bad API because it assumes that the list +is implemented as an array of ``PyObject*``: after all, in order to return a +borrowed reference, we need a reference to borrow, don't we? + +Fortunately, (some) CPython developers are aware of these problems, and there +is an ongoing project to `design a better C API`_ which aims to fix exactly +this kind of problems. + +Nonetheless, in the meantime we still need to implement the current +half-broken APIs: there is no easy solutions for that, and it is likely that +we will always need to pay some performance penalty in order to implement them +correctly. + +However, what we could potentially do is to provide alternative functions +which do the same job but are more PyPy friendly: for example, we could think +of implementing ``PyList_GetItemNonBorrowed`` or something like that: then, C +extensions could choose to use it (possibly hidden inside some macro and +``#ifdef``) if they want to be fast on PyPy. + + +.. _`PyList_GetItem`: https://docs.python.org/2/c-api/list.html#c.PyList_GetItem +.. _`list strategies`: https://morepypy.blogspot.com/2011/10/more-compact-lists-with-list-strategies.html +.. _convert: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/listobject.py#lines-28 +.. _`design a better C API`: https://pythoncapi.readthedocs.io/ From pypy.commits at gmail.com Thu Sep 20 06:29:28 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 03:29:28 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: write sections about benchmarks and next steps Message-ID: <5ba37688.1c69fb81.5dceb.bfa0@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5905:a3f4e0ff4d99 Date: 2018-09-20 12:10 +0200 http://bitbucket.org/pypy/extradoc/changeset/a3f4e0ff4d99/ Log: write sections about benchmarks and next steps diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -21,6 +21,10 @@ poor, meaning that a Python program which makes heavy use of cpyext extensions is likely to be slower on PyPy than on CPython. +Note: in this blog post we are talking about Python 2.7 because it is still +the default version of PyPy: however most of the implementation of cpyext is +shared with PyPy3, so everything applies to that as well. + .. _`official C API`: https://docs.python.org/2/c-api/index.html @@ -207,7 +211,7 @@ well. This means that in theory, passing an arbitrary Python object to C is -potentially costly, because it involves doing a dictionary lookup. I assume +potentially costly, because it involves doing a dictionary lookup. We assume that this cost will eventually show up in the profiler: however, at the time of writing there are other parts of cpyext which are even more costly (as we will show later), so the cost of the dict lookup is never evident in the @@ -301,7 +305,7 @@ assumptions, usually pointing at the cost of conversions between ``W_Root`` and ``PyObject*``, but we never actually measured it. -So, I decided to write a set of `cpyext microbenchmarks`_ to measure the +So, we decided to write a set of `cpyext microbenchmarks`_ to measure the performance of various operation. The result was somewhat surprising: the theory suggests that when you do a cpyext C call, you should pay the border-crossing costs only once, but what the profiler told us was that we @@ -520,3 +524,87 @@ .. _`list strategies`: https://morepypy.blogspot.com/2011/10/more-compact-lists-with-list-strategies.html .. _convert: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/listobject.py#lines-28 .. _`design a better C API`: https://pythoncapi.readthedocs.io/ + + +Current performance +-------------------- + +During the whole blog post we kept talking about the slowness of cpyext: how +much it is, exactly? + +We decided to concentrate on microbenchmarks_ for now: as it should be evident +by now there are simply too many issues which can slow down a cpyext +benchmark, and microbenchmarks help us to concentrate on one (or few) at a +time. + +The microbenchmarks measure very simple stuff, like calling function and +methods with the various calling conventions (no arguments, one arguments, +multiple arguments), passing various types as arguments (to measure conversion +costs), allocating objects from C, and so on. + +This was the performance of PyPy 5.8 relative and normalized to CPython 2.7, +the lower the better: + +.. image:: pypy58.png + +PyPy was horribly slow everywhere, ranging from 2.5x to 10x slower. It is +particularly interesting to compare ``simple.noargs``, which measure the cost +of calling an empty function with no arguments, and ``simple.onearg(i)``, +which measure the cost calling an empty function passing an integer argument: +the latter is ~2x slower than the former, indicating that the conversion cost +of integers is huge. + +PyPy 5.8 was the last release before we famouse Cape Town sprint, when we +started to look at cpyext performance seriously. These are the performance for +PyPy 6.0, the latest release at the time of writing: + +.. image:: pypy60.png + +The results are amazing! PyPy is now massively faster than before, and for +most benchmarks it is even faster than CPython: yes, you read it correctly: +PyPy is faster than CPython at doing CPython's job, even considering all the +extra work it has to do to emulate the C API. This happens thanks to the JIT, +which produce speedups high enough to counterbalance the slowdown caused by +cpyext. + +There are two microbenchmarks which are still slower though: ``allocate_int`` +and ``allocate_tuple``, for the reasons explained in the section about +`Conversion costs`_. + +.. _microbenchmarks: https://github.com/antocuni/cpyext-benchmarks + + +Next steps +----------- + +Despite the spectacular results we got so far, cpyext is still slow enough to +kill performance in most real-world code which uses C extensions extensively +(e.g., the omnipresent numpy). + +Our current approach is something along these lines: + + 1. run a real-world small benchmark which exercises cpyext + + 2. measure and find the bottleneck + + 3. write a corresponding microbenchmark + + 4. optimize it + + 5. repeat + +On one hand, this is a daunting task because the C API is huge and we need to +tackle functions one by one. On the other hand, not all the functions are +equally important, and is is enough to optimize a relatively small subset to +improve lots of different use cases. + +The biggest result is that now we have a clear picture of what are the +problems, and we developed some technical solutions to fix them. It is "only" +a matter of tackling them, one by one. Moreoever, keep in mind that most of +the work was done during two sprints, for a total 2-3 man-months. + +XXX: find a conclusion + + + + diff --git a/blog/draft/2018-09-cpyext/plot.py b/blog/draft/2018-09-cpyext/plot.py new file mode 100644 --- /dev/null +++ b/blog/draft/2018-09-cpyext/plot.py @@ -0,0 +1,45 @@ +def plot_benchmarks(filename, *pythons): + import numpy as np + import matplotlib + import matplotlib.pyplot as plt + + matplotlib.rcParams['figure.figsize'] = (20,15) + + data = {"CPython": {"simple.noargs": 0.43, "simple.onearg(None)": 0.45, "simple.onearg(i)": 0.44, "simple.varargs": 0.6, "simple.allocate_int": 0.46, "simple.allocate_tuple": 0.81, "obj.noargs": 0.44, "obj.onearg(None)": 0.48, "obj.onearg(i)": 0.47, "obj.varargs": 0.63, "len(obj)": 0.34, "obj[0]": 0.25}, + "PyPy 5.8": {"simple.noargs": 1.09, "simple.onearg(None)": 1.34, "simple.onearg(i)": 2.6, "simple.varargs": 2.74, "simple.allocate_int": 2.49, "simple.allocate_tuple": 8.21, "obj.noargs": 1.27, "obj.onearg(None)": 1.55, "obj.onearg(i)": 2.85, "obj.varargs": 3.06, "len(obj)": 1.36, "obj[0]": 1.53}, + "PyPy 5.9": {"simple.noargs": 0.16, "simple.onearg(None)": 0.2, "simple.onearg(i)": 1.61, "simple.varargs": 3.08, "simple.allocate_int": 1.69, "simple.allocate_tuple": 6.39, "obj.noargs": 1.17, "obj.onearg(None)": 1.74, "obj.onearg(i)": 3.03, "obj.varargs": 2.95, "len(obj)": 1.24, "obj[0]": 1.37}, + "PyPy 5.10": {"simple.noargs": 0.18, "simple.onearg(None)": 0.21, "simple.onearg(i)": 1.52, "simple.varargs": 2.59, "simple.allocate_int": 1.67, "simple.allocate_tuple": 6.44, "obj.noargs": 1.12, "obj.onearg(None)": 1.41, "obj.onearg(i)": 2.62, "obj.varargs": 2.89, "len(obj)": 1.21, "obj[0]": 1.32}, + "PyPy 6.0": {"simple.noargs": 0.18, "simple.onearg(None)": 0.2, "simple.onearg(i)": 0.22, "simple.varargs": 0.42, "simple.allocate_int": 0.89, "simple.allocate_tuple": 5.02, "obj.noargs": 0.19, "obj.onearg(None)": 0.22, "obj.onearg(i)": 0.24, "obj.varargs": 0.45, "len(obj)": 0.15, "obj[0]": 0.28}} + + + + #pythons = data.keys() + #pythons = ["CPython", "PyPy 5.10", "PyPy 6.0"] + benchmarks = sorted(data[pythons[0]].keys()) + + # create plot + fig, ax = plt.subplots() + index = np.arange(len(benchmarks)) + bar_width = 0.20 + opacity = 0.8 + + colors = ('blue', 'orange', 'red') #'bgryk' + + for i, python in enumerate(pythons): + values = [data[python][bench] for bench in benchmarks] + normalized = [v/data['CPython'][bench] for (v, bench) in zip(values, benchmarks)] + #print python, values + rects1 = plt.bar(index + bar_width*i, normalized, bar_width, + label=python, + color=colors[i]) + + plt.xlabel('Benchmark') + plt.ylabel('Time (normalized)') + plt.title('cpyext microbenchmarks') + plt.xticks(index + bar_width, benchmarks, rotation=45) + plt.legend() + + plt.savefig(filename) + +plot_benchmarks("pypy58.png", "CPython", "PyPy 5.8") +plot_benchmarks("pypy60.png", "CPython", "PyPy 5.8", "PyPy 6.0") diff --git a/blog/draft/2018-09-cpyext/pypy58.png b/blog/draft/2018-09-cpyext/pypy58.png new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e7636a1d81e6dd169c1abd67b283a93870191cf9 GIT binary patch [cut] diff --git a/blog/draft/2018-09-cpyext/pypy60.png b/blog/draft/2018-09-cpyext/pypy60.png new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f4bc43a7c400fe0dff54421716bb150cb5ab4458 GIT binary patch [cut] From pypy.commits at gmail.com Thu Sep 20 17:21:16 2018 From: pypy.commits at gmail.com (mattip) Date: Thu, 20 Sep 2018 14:21:16 -0700 (PDT) Subject: [pypy-commit] extradoc edits1: break up sentences, other small edits Message-ID: <5ba40f4c.1c69fb81.b74eb.a7b8@mx.google.com> Author: Matti Picus Branch: edits1 Changeset: r5906:f05b4f08b01a Date: 2018-09-21 00:20 +0300 http://bitbucket.org/pypy/extradoc/changeset/f05b4f08b01a/ Log: break up sentences, other small edits diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -1,28 +1,28 @@ -Inside cpyext: why emulating CPython C API is so hard -====================================================== +Inside cpyext: Why emulating CPython C API is so Hard +===================================================== -cpyext is PyPy's subsistem which is responsible to provide a compatibility -layer to compile and run CPython C extensions inside PyPy. Often people asks -why it this particular extension doesn't work or it is very slow on PyPy, but -usually it is hard to answer without going into technical details: the goal of +``cpyext`` is PyPy's subsystem which provides a compatibility +layer to compile and run CPython C extensions inside PyPy. Often people ask +why a particular C extension doesn't work or is very slow on PyPy. +Usually it is hard to answer without going into technical details. The goal of this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). -From a 10.000 foot view, cpyext is PyPy's version of ``"Python.h"``: every time -you compile and extension which uses that header file, you are using cpyext: -this includes extension explicitly written in C (such as ``numpy``) and +From a 10.000 foot view, ``cpyext`` is PyPy's version of ``"Python.h"``. Every time +you compile an extension which uses that header file, you are using ``cpyext``. +This includes extension explicitly written in C (such as ``numpy``) and extensions which are generated from other compilers/preprocessors (e.g. ``Cython``). At the time of writing, the current status is that most C extensions "just -work": generally speaking, you can simply ``pip install`` all of them, +work". Generally speaking, you can simply ``pip install`` them, provided they use the public, `official C API`_ instead of poking at private -implementation details. However, the performance of cpyext are generally -poor, meaning that a Python program which makes heavy use of cpyext extensions +implementation details. However, the performance of cpyext is generally +poor. A Python program which makes heavy use of ``cpyext`` extensions is likely to be slower on PyPy than on CPython. Note: in this blog post we are talking about Python 2.7 because it is still -the default version of PyPy: however most of the implementation of cpyext is +the default version of PyPy: however most of the implementation of ``cpyext`` is shared with PyPy3, so everything applies to that as well. .. _`official C API`: https://docs.python.org/2/c-api/index.html @@ -31,29 +31,29 @@ C API Overview --------------- -In CPython, at the C level, Python objects are represented as ``PyObject*``, +In CPython, which is written in C, Python objects are represented as ``PyObject*``, i.e. (mostly) opaque pointers to some common "base struct". CPython uses a very simple memory management scheme: when you create an -object, you allocate a block of memory of the appropriate size on the heap; -depending on the details you might end up calling different allocators, but +object, you allocate a block of memory of the appropriate size on the heap. +Depending on the details, you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to ``malloc()``. The resulting block of memory is initialized and casted to to ``PyObject*``: this address never changes during the object lifetime, and the C code can freely pass it around, store it inside containers, retrieve it later, etc. -Memory is managed using reference counting: when you create a new reference to +Memory is managed using reference counting. When you create a new reference to an object, or you discard a reference you own, you have to increment_ or -decrement_ reference counter accordingly. When the reference counter goes to -0, it means that the object is no longer used by anyone and can safely be +decrement_ the reference counter accordingly. When the reference counter goes to +0, it means that the object is no longer used and can safely be destroyed. Again, we can simplify and say that this results in a call to ``free()``, which finally releases the memory which was allocated by ``malloc()``. .. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF -Generally speaking, the only way to operate on ``PyObject*`` is to call the +Generally speaking, the only way to operate on a ``PyObject*`` is to call the appropriate API functions. For example, to convert a given ``PyObject*`` to a C integer, you can use _`PyInt_AsLong()`; to add two objects together, you can call _`PyNumber_Add()`. @@ -61,7 +61,7 @@ .. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html?highlight=pyint_check#c.PyInt_AsLong .. _`PyNumber_Add()`: https://docs.python.org/2/c-api/number.html#c.PyNumber_Add -Internally, PyPy uses a similar approach: all Python objects are subclasses of +Internally, PyPy uses a similar approach. All Python objects are subclasses of the RPython ``W_Root`` class, and they are operated by calling methods on the ``space`` singleton, which represents the interpreter. @@ -86,27 +86,27 @@ The PyPy GC -------------- +----------- -To understand some of cpyext challenges, you need to have at least a rough +To understand some of ``cpyext`` challenges, you need to have at least a rough idea of how the PyPy GC works. Contrarily to the popular belief, the "Garbage Collector" is not only about -collecting garbage: instead, it is generally responsible of all memory +collecting garbage: instead, it is generally responsible for all memory management, including allocation and deallocation. Whereas CPython uses a combination of malloc/free/refcounting to manage memory, the PyPy GC uses a completely different approach. It is designed assuming that a dynamic language like Python behaves the following way: - - you create, either directly or indirectly, lots of objects; + - You create, either directly or indirectly, lots of objects. - - most of these objects are temporary and very short-lived: think e.g. of + - Most of these objects are temporary and very short-lived. Think e.g. of doing ``a + b + c``: you need to allocate an object to hold the temporary - result of ``a + b``, but it dies very quickly because you no longer need it - when you do the final ``+ c`` part; + result of ``a + b``, then it dies very quickly because you no longer need it + when you do the final ``+ c`` part. - - only small fraction of the objects survives and stay around for a while. + - Only small fraction of the objects survive and stay around for a while. So, the strategy is: make allocation as fast as possible; make deallocation of short-lived objects as fast as possible; find a way to handle the remaining @@ -114,23 +114,24 @@ This is done using a **Generational GC**: the basic idea is the following: - 1. we have a nursery, where we allocate "young objects" very fast; + 1. We have a nursery, where we allocate "young objects" very quickly. - 2. when the nursery is full, we start what we call a "minor collection": we - do quick scan to determine the small set of objects which survived so - far; + 2. When the nursery is full, we start what we call a "minor collection". + + - We do a quick scan to determine the small set of objects which survived so + far - 3. we **move** these objects out of the nursery, and we place them in the - area of memory which contains the "old objects"; since the address of the - objects just changed, we fix all the references to them accordingly; + - We **move** these objects out of the nursery, and we place them in the + area of memory which contains the "old objects". Since the address of the + objects changes, we fix all the references to them accordingly. - 4. now the nursery contains only objects which died young: we can simply - discard all of them very quickly, reset the nursery and use the same area + 4. now the nursery contains only objects which "died young". We can + discard all of them very quickly, reset the nursery, and use the same area of memory to allocate new objects from now. In practice, this scheme works very well and it is one of the reasons why PyPy is much faster than CPython. However, careful readers have surely noticed -that this is a problem for ``cpyext``: on one hand, we have PyPy objects which +that this is a problem for ``cpyext``. On one hand, we have PyPy objects which can potentially move and change their underlying memory address; on the other hand, we need a way to represent them as fixed-address ``PyObject*`` when we pass them to C extensions. We surely need a way to handle that. @@ -141,39 +142,40 @@ Another challenge is that sometimes, ``PyObject*`` structs are not completely opaque: there are parts of the public API which expose to the user specific -fields of some concrete C struct, for example the definition of PyTypeObject_: -since the low-level layout of PyPy ``W_Root`` objects is completely different +fields of some concrete C struct. For example the definition of PyTypeObject_ +which exposes many of the ``tp_*`` slots to Cython (OK - ???) +Since the low-level layout of PyPy ``W_Root`` objects is completely different than the one used by CPython, we cannot simply pass RPython objects to C; we need a way to handle the difference. .. _PyTypeObject: https://docs.python.org/2/c-api/typeobj.html -So, we have two issues so far: objects which can move, and incompatible +So, we have two issues so far: objects can move, and incompatible low-level layouts. ``cpyext`` solves both by decoupling the RPython and the C -representations: we have two "views" of the same entity, depending on whether +representations. We have two "views" of the same entity, depending on whether we are in the PyPy world (the movable ``W_Root`` subclass) or in the C world (the non-movable ``PyObject*``). -``PyObject*`` are created lazily, only when they are actually needed: the +``PyObject*`` are created lazily, only when they are actually needed. The vast majority of PyPy objects are never passed to any C extension, so we don't -pay any penalty in that case; however, the first time we pass a ``W_Root`` to +pay any penalty in that case. However, the first time we pass a ``W_Root`` to C, we allocate and initialize its ``PyObject*`` counterpart. The same idea applies also to objects which are created in C, e.g. by calling -_`PyObject_New`: at first, only the ``PyObject*`` exists and it is -exclusively managed by reference counting: as soon as we pass it to the PyPy +_`PyObject_New`. At first, only the ``PyObject*`` exists and it is +exclusively managed by reference counting. As soon as we pass it to the PyPy world (e.g. as a return value of a function call), we create its ``W_Root`` counterpart, which is managed by the GC as usual. .. _`PyObject_New`: https://docs.python.org/2/c-api/allocation.html#c.PyObject_New Here we start to see why calling cpyext modules is more costly in PyPy than in -CPython: we need to pay some penalty for all the conversions between +CPython. We need to pay some penalty for all the conversions between ``W_Root`` and ``PyObject*``. Moreover, the first time we pass a ``W_Root`` to C we also need to allocate the memory for the ``PyObject*`` using a slowish "CPython-style" memory -allocator: in practice, for all the objects which are passed to C we pay more +allocator. In practice, for all the objects which are passed to C we pay more or less the same costs as CPython, thus effectively "undoing" the speedup guaranteed by PyPy's Generational GC under normal circumstances. @@ -181,7 +183,7 @@ Maintaining the link between ``W_Root`` and ``PyObject*`` ----------------------------------------------------------- -So, we need a way to convert between ``W_Root`` and ``PyObject*`` and +We now need a way to convert between ``W_Root`` and ``PyObject*`` and vice-versa; also, we need to to ensure that the lifetime of the two entities are in sync. In particular: @@ -192,7 +194,7 @@ make sure that the GC does not collect the ``W_Root``. The ``PyObject*`` ==> ``W_Root`` link is maintained by the special field -_`ob_pypy_link` which is added to all ``PyObject*``: on a 64 bit machine this +_`ob_pypy_link` which is added to all ``PyObject*``. On a 64 bit machine this means that all ``PyObject*`` have 8 bytes of overhead, but then the conversion is very quick, just reading the field. @@ -205,7 +207,7 @@ However, for a _`few selected` ``W_Root`` subclasses we **do** maintain a direct link using the special ``_cpy_ref`` field to improve performance. In particular, we use it for ``W_TypeObject`` (which is big anyway, so a 8 bytes -overhead is negligible) and ``W_NoneObject``: ``None`` is passed around very +overhead is negligible) and ``W_NoneObject``. ``None`` is passed around very often, so we want to ensure that the conversion to ``PyObject*`` is very fast. Moreover it's a singleton, so the 8 bytes overhead is negligible as well. @@ -213,7 +215,7 @@ This means that in theory, passing an arbitrary Python object to C is potentially costly, because it involves doing a dictionary lookup. We assume that this cost will eventually show up in the profiler: however, at the time -of writing there are other parts of cpyext which are even more costly (as we +of writing there are other parts of ``cpyext`` which are even more costly (as we will show later), so the cost of the dict lookup is never evident in the profiler. @@ -224,25 +226,25 @@ Crossing the border between RPython and C ------------------------------------------- +----------------------------------------- There are two other things we need to care about whenever we cross the border between RPython and C, and vice-versa: exception handling and the GIL. In the C API, exceptions are raised by calling `PyErr_SetString()`_ (or one of `many other functions`_ which have a similar effect), which basically works by -creating an exception value and storing it in some global variable; then, the -function signals that an exception has occurred by returning an error value, +creating an exception value and storing it in some global variable. The +function then signals that an exception has occurred by returning an error value, usually ``NULL``. -On the other hand, in the PyPy interpreter they are propagated by raising the +On the other hand, in the PyPy interpreter, exceptions are propagated by raising the RPython-level OperationError_ exception, which wraps the actual app-level -exception values: to harmonize the two worlds, whenever we return from C to -RPython, we need to check whether a C API exception was raised and turn it -into an ``OperationError`` if needed. +exception values: to harmonize the two worlds. Whenever we return from C to +RPython, we need to check whether a C API exception was raised and if so turn it +into an ``OperationError``. -About the GIL, we won't dig into details of `how it is handled in cpyext`_: -for the purpose of this post, it is enough to know that whenever we enter the +We won't dig into details of `how the GIL is handled in cpyext`_. +For the purpose of this post, it is enough to know that whenever we enter C land, we store the current thread id into a global variable which is accessible also from C; conversely, whenever we go back from RPython to C, we restore this value to 0. @@ -251,80 +253,80 @@ border between C and RPython, e.g. by calling a Python callback from C code. All this complexity is automatically handled by the RPython function -`generic_cpy_call`_: if you look at the code you see that it takes care of 4 +`generic_cpy_call`_. If you look at the code you see that it takes care of 4 things: - 1. handling the GIL as explained above + 1. Handling the GIL as explained above. - 2. handling exceptions, if they are raised + 2. Handling exceptions, if they are raised. - 3. converting arguments from ``W_Root`` to ``PyObject*`` + 3. Converting arguments from ``W_Root`` to ``PyObject*``. - 4. converting the return value from ``PyObject*`` to ``W_Root`` + 4. Converting the return value from ``PyObject*`` to ``W_Root``. -So, we can see that calling C from RPython introduce some overhead: how much -is it? +So, we can see that calling C from RPython introduce some overhead. +Can we measure it? Assuming that the conversion between ``W_Root`` and ``PyObject*`` has a reasonable cost (as explained by the previous section), the overhead introduced by a single border-cross is still accettable, especially if the callee is doing some non-negligible amount of work. -However this is not always the case; there are basically three problems that -make (or used to make) cpyext super slow: +However this is not always the case. There are basically three problems that +make (or used to make) ``cpyext`` super slow: - 1. paying the border-crossing cost for trivial operations which are called - very often, such as ``Py_INCREF`` + 1. Paying the border-crossing cost for trivial operations which are called + very often, such as ``Py_INCREF``. - 2. crossing the border back and forth many times, even if it's not strictly - needed + 2. Crossing the border back and forth many times, even if it's not strictly + needed. - 3. paying an excessive cost for argument and return value conversions + 3. Paying an excessive cost for argument and return value conversions. -The next sections are going to explain in more detail each of these problems. +The next sections explain in more detail each of these problems. .. _`PyErr_SetString()`: https://docs.python.org/2/c-api/exceptions.html#c.PyErr_SetString .. _`many other functions`: https://docs.python.org/2/c-api/exceptions.html#exception-handling .. _OperationError: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/interpreter/error.py#lines-20 -.. _`how it is handled in cpyext`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-205 +.. _`how the GIL is handled in cpyext`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-205 .. _`generic_cpy_call`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-1757 Avoiding unnecessary roundtrips -------------------------------- -Prior to the `2017 Cape Town Sprint`_, cpyext was horribly slow, and we were +Prior to the `2017 Cape Town Sprint`_, ``cpyext`` was horribly slow, and we were well aware of it: the main reason was that we never really paid too much -attention to performances: as explained by this blog post, emulating all the +attention to performance. As explained in the blog post, emulating all the CPython quirks is basically a nightmare, so better to concentrate on correctness first. -However, we didn't really know **why** it was so slow: we had theories and +However, we didn't really know **why** it was so slow. We had theories and assumptions, usually pointing at the cost of conversions between ``W_Root`` and ``PyObject*``, but we never actually measured it. So, we decided to write a set of `cpyext microbenchmarks`_ to measure the -performance of various operation. The result was somewhat surprising: the +performance of various operations. The result was somewhat surprising: the theory suggests that when you do a cpyext C call, you should pay the border-crossing costs only once, but what the profiler told us was that we -were paying the cost of ``generic_cpy_call`` several times what we expected. +were paying the cost of ``generic_cpy_call`` several times more than what we expected. After a bit of investigation, we discovered this was ultimately caused by our -"correctness-first" approach. For simplicity of development and testing, when -we started cpyext we wrote everything in RPython: thus, every single API call +"correctness-first" approach. For simplicity of development and testing, when +we started ``cpyext`` we wrote everything in RPython: thus, every single API call made from C (like the omnipresent `PyArg_ParseTuple`_, `PyInt_AsLong`_, etc.) -had to cross back the C-to-RPython border: this was especially daunting for +had to cross back the C-to-RPython border. This was especially daunting for very simple and frequent operations like ``Py_INCREF`` and ``Py_DECREF``, which CPython implements as a single assembly instruction! -Another source of slowness was the implementation of ``PyTypeObject`` slots: -at the C level, these are function pointers which the interpreter calls to do +Another source of slow down was the implementation of ``PyTypeObject`` slots. +At the C level, these are function pointers which the interpreter calls to do certain operations, e.g. `tp_new`_ to allocate a new instance of that type. As usual, we have some magic to implement slots in RPython; in particular, -`_make_wrapper`_ does the opposite of ``generic_cpy_call``: it takes an +`_make_wrapper`_ does the opposite of ``generic_cpy_call``: it takes a RPython function and wraps it into a C function which can be safely called from C, handling the GIL, exceptions and argument conversions automatically. @@ -359,15 +361,15 @@ ``return result``, during the **C-to-RPython** step we convert it from ``PyObject*`` to ``W_IntObject(1234)``. -Phew! After we realized this, it was not so surprising that cpyext was very -slow :). And this was a simplified example, since we are not passing and -``PyObject*`` to the API call: if we did, we would need to convert it back and +Phew! After we realized this, it was not so surprising that ``cpyext`` was very +slow :). And this was a simplified example, since we are not passing a +``PyObject*`` to the API call. When we do, we need to convert it back and forth at every step. Actually, I am not even sure that what I described was the exact sequence of steps which used to happen, but you get the general idea. -The solution is simple: rewrite as much as we can in C instead of RPython, so -to avoid unnecessary roundtrips: this was the topic of most of the Cape Town +The solution is simple: rewrite as much as we can in C instead of RPython, +to avoid unnecessary roundtrips. This was the topic of most of the Cape Town sprint and resulted in the ``cpyext-avoid-roundtrip`` branch, which was eventually merged_. @@ -377,13 +379,14 @@ involves list strategies, so we cannot replicate it in C. However, we discovered that a large subset of the C API can benefit from this. -Moreover, the C API is **huge**: the biggest achievement of the branch was to -discover and invent this new way of writing cpyext code, but we still need to -convert many of the functions. Also, sometimes the rewrite is not automatic -or straighforward: cpyext is a delicate piece of software, so it happens often -that you end up debugging a segfault in gdb. +Moreover, the C API is **huge**. While we invented this new way of writing +``cpyext`` code, we still need to +convert many of the functions to the new paradigm. Sometimes the rewrite is +not automatic +or straighforward. ``cpyext`` is a delicate piece of software, so it happens often +that we make a mistake and end up staring at a segfault in gdb. -However, the most important remark is that the performance improvement we got +However, the most important takeaway is that the performance improvements we got from this optimization are impressive, as we will detail later. .. _`2017 Cape Town Sprint`: https://morepypy.blogspot.com/2017/10/cape-of-good-hope-for-pypy-hello-from.html @@ -396,13 +399,13 @@ Conversion costs ------------------ +---------------- The other potential big source of slowdown is the conversion of arguments between ``W_Root`` and ``PyObject*``. As explained earlier, the first time you pass a ``W_Root`` to C, you need to -allocate it's ``PyObject*`` counterpart. Suppose to have a ``foo`` function +allocate its ``PyObject*`` counterpart. Suppose you have a ``foo`` function defined in C, which takes a single int argument: .. sourcecode:: python @@ -416,12 +419,12 @@ CPython has the very same problem, which is solved by using a `free list`_ to `allocate ints`_. So, what we did was to simply `steal the code`_ from CPython -and do the exact same thing: this was also done in the +and do the exact same thing. This was also done in the ``cpyext-avoid-roundtrip`` branch, and the benchmarks show that it worked perfectly. Every type which is converted often to ``PyObject*`` must have a very fast -allocator: at the moment of writing, PyPy uses free lists only for ints and +allocator. At the moment of writing, PyPy uses free lists only for ints and tuples_: one of the next steps on our TODO list is certainly to use this technique with more types, like ``float``. @@ -439,24 +442,24 @@ At every iteration, we get an item out of the array: the return type is a an instance of ``numpy.float64`` (a numpy scalar), i.e. a ``PyObject'*``: this is something which is implemented by numpy entirely in C, so completely -transparent to cpyext: we don't have any control on how it is allocated, +transparent to ``cpyext``. We don't have any control on how it is allocated, managed, etc., and we can assume that allocation costs are the same than on CPython. As soon as we return these ``PyObject*`` to Python, we need to allocate -theirs ``W_Root`` equivalent: if you do it in a small loop like in the example +their ``W_Root`` equivalent. If you do it in a small loop like in the example above, you end up allocating all these ``W_Root`` inside the nursery, which is a good thing since allocation is super fast (see the section above about the PyPy GC). -However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link: -currently, we do this by putting all of them in a dictionary, but it is very +However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link. +Currently, we do this by putting all of them in a dictionary, but it is very inefficient, especially because most of these objects dies young and thus it is wasted work to do that for them. Currently, this is one of the biggest -unresolved problem in cpyext, and it is what casuses the two microbenchmarks +unresolved problem in ``cpyext``, and it is what casuses the two microbenchmarks ``allocate_int`` and ``allocate_tuple`` to be very slow. -We are well aware of the problem, and we have a plan for how to fix it; the +We are well aware of the problem, and we have a plan for how to fix it. The explanation is too technical for the scope of this blog post as it requires a deep knowledge of the GC internals to be understood, but the details are here_. @@ -469,36 +472,36 @@ C API quirks --------------------- +------------ -Finally, there is another source of slowdown which is beyond our control: some +Finally, there is another source of slowdown which is beyond our control. Some parts of the CPython C API are badly designed and expose some of the implementation details of CPython. -The major example is reference counting: the ``Py_INCREF`` / ``Py_DECREF`` API +The major example is reference counting. The ``Py_INCREF`` / ``Py_DECREF`` API is designed in such a way which forces other implementation to emulate refcounting even in presence of other GC management schemes, as explained above. -Another example is borrowed references: there are API functions which **do +Another example is borrowed references. There are API functions which **do not** incref an object before returning it, e.g. `PyList_GetItem`_. This is done for performance reasons because we can avoid a whole incref/decref pair, if the caller needs to handle the returned item only temporarily: the item is kept alive because it is in the list anyway. For PyPy this is a challenge: thanks to `list strategies`_, often lists are -represented in a compact way: e.g. a list containing only integers is stored +represented in a compact way. A list containing only integers is stored as a C array of ``long``. How to implement ``PyList_GetItem``? We cannot simply create a ``PyObject*`` on the fly, because the caller will never decref it and it will result in a memory leak. -The current solution is very inefficient: basically, the first time we do a +The current solution is very inefficient. The first time we do a ``PyList_GetItem``, we convert_ the **whole** list to a list of ``PyObject*``. This is bad in two ways: the first is that we potentially pay a lot of unneeded conversion cost in case we will never access the other items -of the list; the second is that by doing that we lose all the performance -benefit granted by the original list strategy, making it slower even for the -rest of pure-python code which will manipulate the list later. +of the list. The second is that by doing that we lose all the performance +benefit granted by the original list strategy, making it slower for the +rest of the pure-python code which will manipulate the list later. ``PyList_GetItem`` is an example of a bad API because it assumes that the list is implemented as an array of ``PyObject*``: after all, in order to return a @@ -506,10 +509,10 @@ Fortunately, (some) CPython developers are aware of these problems, and there is an ongoing project to `design a better C API`_ which aims to fix exactly -this kind of problems. +this kind of problem. Nonetheless, in the meantime we still need to implement the current -half-broken APIs: there is no easy solutions for that, and it is likely that +half-broken APIs. There is no easy solution for that, and it is likely that we will always need to pay some performance penalty in order to implement them correctly. @@ -527,22 +530,22 @@ Current performance --------------------- +------------------- -During the whole blog post we kept talking about the slowness of cpyext: how -much it is, exactly? +During the whole blog post we claimed ``cpyext`` is slow. How +slow it is, exactly? -We decided to concentrate on microbenchmarks_ for now: as it should be evident -by now there are simply too many issues which can slow down a cpyext +We decided to concentrate on microbenchmarks_ for now. It should be evident +by now there are simply too many issues which can slow down a ``cpyext`` benchmark, and microbenchmarks help us to concentrate on one (or few) at a time. -The microbenchmarks measure very simple stuff, like calling function and +The microbenchmarks measure very simple thins, like calling functions and methods with the various calling conventions (no arguments, one arguments, -multiple arguments), passing various types as arguments (to measure conversion -costs), allocating objects from C, and so on. +multiple arguments); passing various types as arguments (to measure conversion +costs); allocating objects from C, and so on. -This was the performance of PyPy 5.8 relative and normalized to CPython 2.7, +Here are the results from PyPy 5.8 relative and normalized to CPython 2.7, the lower the better: .. image:: pypy58.png @@ -555,7 +558,7 @@ of integers is huge. PyPy 5.8 was the last release before we famouse Cape Town sprint, when we -started to look at cpyext performance seriously. These are the performance for +started to look at cpyext performance seriously. Here are the performance for PyPy 6.0, the latest release at the time of writing: .. image:: pypy60.png @@ -575,9 +578,9 @@ Next steps ------------ +---------- -Despite the spectacular results we got so far, cpyext is still slow enough to +Despite the spectacular results we got so far, ``cpyext`` is still slow enough to kill performance in most real-world code which uses C extensions extensively (e.g., the omnipresent numpy). @@ -596,12 +599,13 @@ On one hand, this is a daunting task because the C API is huge and we need to tackle functions one by one. On the other hand, not all the functions are equally important, and is is enough to optimize a relatively small subset to -improve lots of different use cases. +improve many different use cases. -The biggest result is that now we have a clear picture of what are the -problems, and we developed some technical solutions to fix them. It is "only" -a matter of tackling them, one by one. Moreoever, keep in mind that most of -the work was done during two sprints, for a total 2-3 man-months. +Where a year ago we announced we have a working answer to run c-extension in PyPy, +we now have a clear picture of what are the +performance bottlenecks, and we have developed some technical solutions to fix them. It is "only" +a matter of tackling them, one by one. Most of +the work was done during two sprints, for a total 2-3 person-months of work. XXX: find a conclusion From pypy.commits at gmail.com Thu Sep 20 17:25:23 2018 From: pypy.commits at gmail.com (mattip) Date: Thu, 20 Sep 2018 14:25:23 -0700 (PDT) Subject: [pypy-commit] extradoc edit2: draft suggestion for conclusion Message-ID: <5ba41043.1c69fb81.b2b6c.7836@mx.google.com> Author: Matti Picus Branch: edit2 Changeset: r5907:2dfbd8333757 Date: 2018-09-21 00:24 +0300 http://bitbucket.org/pypy/extradoc/changeset/2dfbd8333757/ Log: draft suggestion for conclusion diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -603,8 +603,13 @@ a matter of tackling them, one by one. Moreoever, keep in mind that most of the work was done during two sprints, for a total 2-3 man-months. -XXX: find a conclusion +We think this work is important for the Python ecosystem. PyPy has established +a baseline for performance in pure python code, providing an answer for the +"Python is slow" detractors. The techniques used to make ``cpyext`` performant +will let PyPy become an alternative for people who mix C extensions with python, +which, it turns out, is just about everyone. Today, many developers are forced +to seek performance by converting code from python to a lower language. We +feel there is no reason to do this, but in order to prove it we must be able +to run both their python and their c-extensions performantly, then we can begin +to educate them how to write JIT-freindly code in the first place. - - - From pypy.commits at gmail.com Thu Sep 20 18:53:51 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:53:51 -0700 (PDT) Subject: [pypy-commit] extradoc edits1: merged branch Message-ID: <5ba424ff.1c69fb81.fd5f8.e4b3@mx.google.com> Author: Antonio Cuni Branch: edits1 Changeset: r5908:6aee2c34a437 Date: 2018-09-21 00:36 +0200 http://bitbucket.org/pypy/extradoc/changeset/6aee2c34a437/ Log: merged branch From pypy.commits at gmail.com Thu Sep 20 18:53:54 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:53:54 -0700 (PDT) Subject: [pypy-commit] extradoc edit2: merged branch Message-ID: <5ba42502.1c69fb81.b0205.8af8@mx.google.com> Author: Antonio Cuni Branch: edit2 Changeset: r5909:d4df33d4c0ad Date: 2018-09-21 00:36 +0200 http://bitbucket.org/pypy/extradoc/changeset/d4df33d4c0ad/ Log: merged branch From pypy.commits at gmail.com Thu Sep 20 18:53:56 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:53:56 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: merge matti's branch Message-ID: <5ba42504.1c69fb81.8545.7627@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5910:950b881dbe7f Date: 2018-09-21 00:37 +0200 http://bitbucket.org/pypy/extradoc/changeset/950b881dbe7f/ Log: merge matti's branch diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -1,28 +1,28 @@ -Inside cpyext: why emulating CPython C API is so hard -====================================================== +Inside cpyext: Why emulating CPython C API is so Hard +===================================================== -cpyext is PyPy's subsistem which is responsible to provide a compatibility -layer to compile and run CPython C extensions inside PyPy. Often people asks -why it this particular extension doesn't work or it is very slow on PyPy, but -usually it is hard to answer without going into technical details: the goal of +``cpyext`` is PyPy's subsystem which provides a compatibility +layer to compile and run CPython C extensions inside PyPy. Often people ask +why a particular C extension doesn't work or is very slow on PyPy. +Usually it is hard to answer without going into technical details. The goal of this blog post is to explain some of these technical details, so that we can simply link here instead of explaing again and again :). -From a 10.000 foot view, cpyext is PyPy's version of ``"Python.h"``: every time -you compile and extension which uses that header file, you are using cpyext: -this includes extension explicitly written in C (such as ``numpy``) and +From a 10.000 foot view, ``cpyext`` is PyPy's version of ``"Python.h"``. Every time +you compile an extension which uses that header file, you are using ``cpyext``. +This includes extension explicitly written in C (such as ``numpy``) and extensions which are generated from other compilers/preprocessors (e.g. ``Cython``). At the time of writing, the current status is that most C extensions "just -work": generally speaking, you can simply ``pip install`` all of them, +work". Generally speaking, you can simply ``pip install`` them, provided they use the public, `official C API`_ instead of poking at private -implementation details. However, the performance of cpyext are generally -poor, meaning that a Python program which makes heavy use of cpyext extensions +implementation details. However, the performance of cpyext is generally +poor. A Python program which makes heavy use of ``cpyext`` extensions is likely to be slower on PyPy than on CPython. Note: in this blog post we are talking about Python 2.7 because it is still -the default version of PyPy: however most of the implementation of cpyext is +the default version of PyPy: however most of the implementation of ``cpyext`` is shared with PyPy3, so everything applies to that as well. .. _`official C API`: https://docs.python.org/2/c-api/index.html @@ -31,29 +31,29 @@ C API Overview --------------- -In CPython, at the C level, Python objects are represented as ``PyObject*``, +In CPython, which is written in C, Python objects are represented as ``PyObject*``, i.e. (mostly) opaque pointers to some common "base struct". CPython uses a very simple memory management scheme: when you create an -object, you allocate a block of memory of the appropriate size on the heap; -depending on the details you might end up calling different allocators, but +object, you allocate a block of memory of the appropriate size on the heap. +Depending on the details, you might end up calling different allocators, but for the sake of simplicity, you can think that this ends up being a call to ``malloc()``. The resulting block of memory is initialized and casted to to ``PyObject*``: this address never changes during the object lifetime, and the C code can freely pass it around, store it inside containers, retrieve it later, etc. -Memory is managed using reference counting: when you create a new reference to +Memory is managed using reference counting. When you create a new reference to an object, or you discard a reference you own, you have to increment_ or -decrement_ reference counter accordingly. When the reference counter goes to -0, it means that the object is no longer used by anyone and can safely be +decrement_ the reference counter accordingly. When the reference counter goes to +0, it means that the object is no longer used and can safely be destroyed. Again, we can simplify and say that this results in a call to ``free()``, which finally releases the memory which was allocated by ``malloc()``. .. _increment: https://docs.python.org/2/c-api/refcounting.html#c.Py_INCREF .. _decrement: https://docs.python.org/2/c-api/refcounting.html#c.Py_DECREF -Generally speaking, the only way to operate on ``PyObject*`` is to call the +Generally speaking, the only way to operate on a ``PyObject*`` is to call the appropriate API functions. For example, to convert a given ``PyObject*`` to a C integer, you can use _`PyInt_AsLong()`; to add two objects together, you can call _`PyNumber_Add()`. @@ -61,7 +61,7 @@ .. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html?highlight=pyint_check#c.PyInt_AsLong .. _`PyNumber_Add()`: https://docs.python.org/2/c-api/number.html#c.PyNumber_Add -Internally, PyPy uses a similar approach: all Python objects are subclasses of +Internally, PyPy uses a similar approach. All Python objects are subclasses of the RPython ``W_Root`` class, and they are operated by calling methods on the ``space`` singleton, which represents the interpreter. @@ -86,27 +86,27 @@ The PyPy GC -------------- +----------- -To understand some of cpyext challenges, you need to have at least a rough +To understand some of ``cpyext`` challenges, you need to have at least a rough idea of how the PyPy GC works. Contrarily to the popular belief, the "Garbage Collector" is not only about -collecting garbage: instead, it is generally responsible of all memory +collecting garbage: instead, it is generally responsible for all memory management, including allocation and deallocation. Whereas CPython uses a combination of malloc/free/refcounting to manage memory, the PyPy GC uses a completely different approach. It is designed assuming that a dynamic language like Python behaves the following way: - - you create, either directly or indirectly, lots of objects; + - You create, either directly or indirectly, lots of objects. - - most of these objects are temporary and very short-lived: think e.g. of + - Most of these objects are temporary and very short-lived. Think e.g. of doing ``a + b + c``: you need to allocate an object to hold the temporary - result of ``a + b``, but it dies very quickly because you no longer need it - when you do the final ``+ c`` part; + result of ``a + b``, then it dies very quickly because you no longer need it + when you do the final ``+ c`` part. - - only small fraction of the objects survives and stay around for a while. + - Only small fraction of the objects survive and stay around for a while. So, the strategy is: make allocation as fast as possible; make deallocation of short-lived objects as fast as possible; find a way to handle the remaining @@ -114,23 +114,24 @@ This is done using a **Generational GC**: the basic idea is the following: - 1. we have a nursery, where we allocate "young objects" very fast; + 1. We have a nursery, where we allocate "young objects" very quickly. - 2. when the nursery is full, we start what we call a "minor collection": we - do quick scan to determine the small set of objects which survived so - far; + 2. When the nursery is full, we start what we call a "minor collection". + + - We do a quick scan to determine the small set of objects which survived so + far - 3. we **move** these objects out of the nursery, and we place them in the - area of memory which contains the "old objects"; since the address of the - objects just changed, we fix all the references to them accordingly; + - We **move** these objects out of the nursery, and we place them in the + area of memory which contains the "old objects". Since the address of the + objects changes, we fix all the references to them accordingly. - 4. now the nursery contains only objects which died young: we can simply - discard all of them very quickly, reset the nursery and use the same area + 4. now the nursery contains only objects which "died young". We can + discard all of them very quickly, reset the nursery, and use the same area of memory to allocate new objects from now. In practice, this scheme works very well and it is one of the reasons why PyPy is much faster than CPython. However, careful readers have surely noticed -that this is a problem for ``cpyext``: on one hand, we have PyPy objects which +that this is a problem for ``cpyext``. On one hand, we have PyPy objects which can potentially move and change their underlying memory address; on the other hand, we need a way to represent them as fixed-address ``PyObject*`` when we pass them to C extensions. We surely need a way to handle that. @@ -141,39 +142,40 @@ Another challenge is that sometimes, ``PyObject*`` structs are not completely opaque: there are parts of the public API which expose to the user specific -fields of some concrete C struct, for example the definition of PyTypeObject_: -since the low-level layout of PyPy ``W_Root`` objects is completely different +fields of some concrete C struct. For example the definition of PyTypeObject_ +which exposes many of the ``tp_*`` slots to Cython (OK - ???) +Since the low-level layout of PyPy ``W_Root`` objects is completely different than the one used by CPython, we cannot simply pass RPython objects to C; we need a way to handle the difference. .. _PyTypeObject: https://docs.python.org/2/c-api/typeobj.html -So, we have two issues so far: objects which can move, and incompatible +So, we have two issues so far: objects can move, and incompatible low-level layouts. ``cpyext`` solves both by decoupling the RPython and the C -representations: we have two "views" of the same entity, depending on whether +representations. We have two "views" of the same entity, depending on whether we are in the PyPy world (the movable ``W_Root`` subclass) or in the C world (the non-movable ``PyObject*``). -``PyObject*`` are created lazily, only when they are actually needed: the +``PyObject*`` are created lazily, only when they are actually needed. The vast majority of PyPy objects are never passed to any C extension, so we don't -pay any penalty in that case; however, the first time we pass a ``W_Root`` to +pay any penalty in that case. However, the first time we pass a ``W_Root`` to C, we allocate and initialize its ``PyObject*`` counterpart. The same idea applies also to objects which are created in C, e.g. by calling -_`PyObject_New`: at first, only the ``PyObject*`` exists and it is -exclusively managed by reference counting: as soon as we pass it to the PyPy +_`PyObject_New`. At first, only the ``PyObject*`` exists and it is +exclusively managed by reference counting. As soon as we pass it to the PyPy world (e.g. as a return value of a function call), we create its ``W_Root`` counterpart, which is managed by the GC as usual. .. _`PyObject_New`: https://docs.python.org/2/c-api/allocation.html#c.PyObject_New Here we start to see why calling cpyext modules is more costly in PyPy than in -CPython: we need to pay some penalty for all the conversions between +CPython. We need to pay some penalty for all the conversions between ``W_Root`` and ``PyObject*``. Moreover, the first time we pass a ``W_Root`` to C we also need to allocate the memory for the ``PyObject*`` using a slowish "CPython-style" memory -allocator: in practice, for all the objects which are passed to C we pay more +allocator. In practice, for all the objects which are passed to C we pay more or less the same costs as CPython, thus effectively "undoing" the speedup guaranteed by PyPy's Generational GC under normal circumstances. @@ -181,7 +183,7 @@ Maintaining the link between ``W_Root`` and ``PyObject*`` ----------------------------------------------------------- -So, we need a way to convert between ``W_Root`` and ``PyObject*`` and +We now need a way to convert between ``W_Root`` and ``PyObject*`` and vice-versa; also, we need to to ensure that the lifetime of the two entities are in sync. In particular: @@ -192,7 +194,7 @@ make sure that the GC does not collect the ``W_Root``. The ``PyObject*`` ==> ``W_Root`` link is maintained by the special field -_`ob_pypy_link` which is added to all ``PyObject*``: on a 64 bit machine this +_`ob_pypy_link` which is added to all ``PyObject*``. On a 64 bit machine this means that all ``PyObject*`` have 8 bytes of overhead, but then the conversion is very quick, just reading the field. @@ -205,7 +207,7 @@ However, for a _`few selected` ``W_Root`` subclasses we **do** maintain a direct link using the special ``_cpy_ref`` field to improve performance. In particular, we use it for ``W_TypeObject`` (which is big anyway, so a 8 bytes -overhead is negligible) and ``W_NoneObject``: ``None`` is passed around very +overhead is negligible) and ``W_NoneObject``. ``None`` is passed around very often, so we want to ensure that the conversion to ``PyObject*`` is very fast. Moreover it's a singleton, so the 8 bytes overhead is negligible as well. @@ -213,7 +215,7 @@ This means that in theory, passing an arbitrary Python object to C is potentially costly, because it involves doing a dictionary lookup. We assume that this cost will eventually show up in the profiler: however, at the time -of writing there are other parts of cpyext which are even more costly (as we +of writing there are other parts of ``cpyext`` which are even more costly (as we will show later), so the cost of the dict lookup is never evident in the profiler. @@ -224,25 +226,25 @@ Crossing the border between RPython and C ------------------------------------------- +----------------------------------------- There are two other things we need to care about whenever we cross the border between RPython and C, and vice-versa: exception handling and the GIL. In the C API, exceptions are raised by calling `PyErr_SetString()`_ (or one of `many other functions`_ which have a similar effect), which basically works by -creating an exception value and storing it in some global variable; then, the -function signals that an exception has occurred by returning an error value, +creating an exception value and storing it in some global variable. The +function then signals that an exception has occurred by returning an error value, usually ``NULL``. -On the other hand, in the PyPy interpreter they are propagated by raising the +On the other hand, in the PyPy interpreter, exceptions are propagated by raising the RPython-level OperationError_ exception, which wraps the actual app-level -exception values: to harmonize the two worlds, whenever we return from C to -RPython, we need to check whether a C API exception was raised and turn it -into an ``OperationError`` if needed. +exception values: to harmonize the two worlds. Whenever we return from C to +RPython, we need to check whether a C API exception was raised and if so turn it +into an ``OperationError``. -About the GIL, we won't dig into details of `how it is handled in cpyext`_: -for the purpose of this post, it is enough to know that whenever we enter the +We won't dig into details of `how the GIL is handled in cpyext`_. +For the purpose of this post, it is enough to know that whenever we enter C land, we store the current thread id into a global variable which is accessible also from C; conversely, whenever we go back from RPython to C, we restore this value to 0. @@ -251,80 +253,80 @@ border between C and RPython, e.g. by calling a Python callback from C code. All this complexity is automatically handled by the RPython function -`generic_cpy_call`_: if you look at the code you see that it takes care of 4 +`generic_cpy_call`_. If you look at the code you see that it takes care of 4 things: - 1. handling the GIL as explained above + 1. Handling the GIL as explained above. - 2. handling exceptions, if they are raised + 2. Handling exceptions, if they are raised. - 3. converting arguments from ``W_Root`` to ``PyObject*`` + 3. Converting arguments from ``W_Root`` to ``PyObject*``. - 4. converting the return value from ``PyObject*`` to ``W_Root`` + 4. Converting the return value from ``PyObject*`` to ``W_Root``. -So, we can see that calling C from RPython introduce some overhead: how much -is it? +So, we can see that calling C from RPython introduce some overhead. +Can we measure it? Assuming that the conversion between ``W_Root`` and ``PyObject*`` has a reasonable cost (as explained by the previous section), the overhead introduced by a single border-cross is still accettable, especially if the callee is doing some non-negligible amount of work. -However this is not always the case; there are basically three problems that -make (or used to make) cpyext super slow: +However this is not always the case. There are basically three problems that +make (or used to make) ``cpyext`` super slow: - 1. paying the border-crossing cost for trivial operations which are called - very often, such as ``Py_INCREF`` + 1. Paying the border-crossing cost for trivial operations which are called + very often, such as ``Py_INCREF``. - 2. crossing the border back and forth many times, even if it's not strictly - needed + 2. Crossing the border back and forth many times, even if it's not strictly + needed. - 3. paying an excessive cost for argument and return value conversions + 3. Paying an excessive cost for argument and return value conversions. -The next sections are going to explain in more detail each of these problems. +The next sections explain in more detail each of these problems. .. _`PyErr_SetString()`: https://docs.python.org/2/c-api/exceptions.html#c.PyErr_SetString .. _`many other functions`: https://docs.python.org/2/c-api/exceptions.html#exception-handling .. _OperationError: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/interpreter/error.py#lines-20 -.. _`how it is handled in cpyext`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-205 +.. _`how the GIL is handled in cpyext`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-205 .. _`generic_cpy_call`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-1757 Avoiding unnecessary roundtrips -------------------------------- -Prior to the `2017 Cape Town Sprint`_, cpyext was horribly slow, and we were +Prior to the `2017 Cape Town Sprint`_, ``cpyext`` was horribly slow, and we were well aware of it: the main reason was that we never really paid too much -attention to performances: as explained by this blog post, emulating all the +attention to performance. As explained in the blog post, emulating all the CPython quirks is basically a nightmare, so better to concentrate on correctness first. -However, we didn't really know **why** it was so slow: we had theories and +However, we didn't really know **why** it was so slow. We had theories and assumptions, usually pointing at the cost of conversions between ``W_Root`` and ``PyObject*``, but we never actually measured it. So, we decided to write a set of `cpyext microbenchmarks`_ to measure the -performance of various operation. The result was somewhat surprising: the +performance of various operations. The result was somewhat surprising: the theory suggests that when you do a cpyext C call, you should pay the border-crossing costs only once, but what the profiler told us was that we -were paying the cost of ``generic_cpy_call`` several times what we expected. +were paying the cost of ``generic_cpy_call`` several times more than what we expected. After a bit of investigation, we discovered this was ultimately caused by our -"correctness-first" approach. For simplicity of development and testing, when -we started cpyext we wrote everything in RPython: thus, every single API call +"correctness-first" approach. For simplicity of development and testing, when +we started ``cpyext`` we wrote everything in RPython: thus, every single API call made from C (like the omnipresent `PyArg_ParseTuple`_, `PyInt_AsLong`_, etc.) -had to cross back the C-to-RPython border: this was especially daunting for +had to cross back the C-to-RPython border. This was especially daunting for very simple and frequent operations like ``Py_INCREF`` and ``Py_DECREF``, which CPython implements as a single assembly instruction! -Another source of slowness was the implementation of ``PyTypeObject`` slots: -at the C level, these are function pointers which the interpreter calls to do +Another source of slow down was the implementation of ``PyTypeObject`` slots. +At the C level, these are function pointers which the interpreter calls to do certain operations, e.g. `tp_new`_ to allocate a new instance of that type. As usual, we have some magic to implement slots in RPython; in particular, -`_make_wrapper`_ does the opposite of ``generic_cpy_call``: it takes an +`_make_wrapper`_ does the opposite of ``generic_cpy_call``: it takes a RPython function and wraps it into a C function which can be safely called from C, handling the GIL, exceptions and argument conversions automatically. @@ -359,15 +361,15 @@ ``return result``, during the **C-to-RPython** step we convert it from ``PyObject*`` to ``W_IntObject(1234)``. -Phew! After we realized this, it was not so surprising that cpyext was very -slow :). And this was a simplified example, since we are not passing and -``PyObject*`` to the API call: if we did, we would need to convert it back and +Phew! After we realized this, it was not so surprising that ``cpyext`` was very +slow :). And this was a simplified example, since we are not passing a +``PyObject*`` to the API call. When we do, we need to convert it back and forth at every step. Actually, I am not even sure that what I described was the exact sequence of steps which used to happen, but you get the general idea. -The solution is simple: rewrite as much as we can in C instead of RPython, so -to avoid unnecessary roundtrips: this was the topic of most of the Cape Town +The solution is simple: rewrite as much as we can in C instead of RPython, +to avoid unnecessary roundtrips. This was the topic of most of the Cape Town sprint and resulted in the ``cpyext-avoid-roundtrip`` branch, which was eventually merged_. @@ -377,13 +379,14 @@ involves list strategies, so we cannot replicate it in C. However, we discovered that a large subset of the C API can benefit from this. -Moreover, the C API is **huge**: the biggest achievement of the branch was to -discover and invent this new way of writing cpyext code, but we still need to -convert many of the functions. Also, sometimes the rewrite is not automatic -or straighforward: cpyext is a delicate piece of software, so it happens often -that you end up debugging a segfault in gdb. +Moreover, the C API is **huge**. While we invented this new way of writing +``cpyext`` code, we still need to +convert many of the functions to the new paradigm. Sometimes the rewrite is +not automatic +or straighforward. ``cpyext`` is a delicate piece of software, so it happens often +that we make a mistake and end up staring at a segfault in gdb. -However, the most important remark is that the performance improvement we got +However, the most important takeaway is that the performance improvements we got from this optimization are impressive, as we will detail later. .. _`2017 Cape Town Sprint`: https://morepypy.blogspot.com/2017/10/cape-of-good-hope-for-pypy-hello-from.html @@ -396,13 +399,13 @@ Conversion costs ------------------ +---------------- The other potential big source of slowdown is the conversion of arguments between ``W_Root`` and ``PyObject*``. As explained earlier, the first time you pass a ``W_Root`` to C, you need to -allocate it's ``PyObject*`` counterpart. Suppose to have a ``foo`` function +allocate its ``PyObject*`` counterpart. Suppose you have a ``foo`` function defined in C, which takes a single int argument: .. sourcecode:: python @@ -416,12 +419,12 @@ CPython has the very same problem, which is solved by using a `free list`_ to `allocate ints`_. So, what we did was to simply `steal the code`_ from CPython -and do the exact same thing: this was also done in the +and do the exact same thing. This was also done in the ``cpyext-avoid-roundtrip`` branch, and the benchmarks show that it worked perfectly. Every type which is converted often to ``PyObject*`` must have a very fast -allocator: at the moment of writing, PyPy uses free lists only for ints and +allocator. At the moment of writing, PyPy uses free lists only for ints and tuples_: one of the next steps on our TODO list is certainly to use this technique with more types, like ``float``. @@ -439,24 +442,24 @@ At every iteration, we get an item out of the array: the return type is a an instance of ``numpy.float64`` (a numpy scalar), i.e. a ``PyObject'*``: this is something which is implemented by numpy entirely in C, so completely -transparent to cpyext: we don't have any control on how it is allocated, +transparent to ``cpyext``. We don't have any control on how it is allocated, managed, etc., and we can assume that allocation costs are the same than on CPython. As soon as we return these ``PyObject*`` to Python, we need to allocate -theirs ``W_Root`` equivalent: if you do it in a small loop like in the example +their ``W_Root`` equivalent. If you do it in a small loop like in the example above, you end up allocating all these ``W_Root`` inside the nursery, which is a good thing since allocation is super fast (see the section above about the PyPy GC). -However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link: -currently, we do this by putting all of them in a dictionary, but it is very +However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link. +Currently, we do this by putting all of them in a dictionary, but it is very inefficient, especially because most of these objects dies young and thus it is wasted work to do that for them. Currently, this is one of the biggest -unresolved problem in cpyext, and it is what casuses the two microbenchmarks +unresolved problem in ``cpyext``, and it is what casuses the two microbenchmarks ``allocate_int`` and ``allocate_tuple`` to be very slow. -We are well aware of the problem, and we have a plan for how to fix it; the +We are well aware of the problem, and we have a plan for how to fix it. The explanation is too technical for the scope of this blog post as it requires a deep knowledge of the GC internals to be understood, but the details are here_. @@ -469,36 +472,36 @@ C API quirks --------------------- +------------ -Finally, there is another source of slowdown which is beyond our control: some +Finally, there is another source of slowdown which is beyond our control. Some parts of the CPython C API are badly designed and expose some of the implementation details of CPython. -The major example is reference counting: the ``Py_INCREF`` / ``Py_DECREF`` API +The major example is reference counting. The ``Py_INCREF`` / ``Py_DECREF`` API is designed in such a way which forces other implementation to emulate refcounting even in presence of other GC management schemes, as explained above. -Another example is borrowed references: there are API functions which **do +Another example is borrowed references. There are API functions which **do not** incref an object before returning it, e.g. `PyList_GetItem`_. This is done for performance reasons because we can avoid a whole incref/decref pair, if the caller needs to handle the returned item only temporarily: the item is kept alive because it is in the list anyway. For PyPy this is a challenge: thanks to `list strategies`_, often lists are -represented in a compact way: e.g. a list containing only integers is stored +represented in a compact way. A list containing only integers is stored as a C array of ``long``. How to implement ``PyList_GetItem``? We cannot simply create a ``PyObject*`` on the fly, because the caller will never decref it and it will result in a memory leak. -The current solution is very inefficient: basically, the first time we do a +The current solution is very inefficient. The first time we do a ``PyList_GetItem``, we convert_ the **whole** list to a list of ``PyObject*``. This is bad in two ways: the first is that we potentially pay a lot of unneeded conversion cost in case we will never access the other items -of the list; the second is that by doing that we lose all the performance -benefit granted by the original list strategy, making it slower even for the -rest of pure-python code which will manipulate the list later. +of the list. The second is that by doing that we lose all the performance +benefit granted by the original list strategy, making it slower for the +rest of the pure-python code which will manipulate the list later. ``PyList_GetItem`` is an example of a bad API because it assumes that the list is implemented as an array of ``PyObject*``: after all, in order to return a @@ -506,10 +509,10 @@ Fortunately, (some) CPython developers are aware of these problems, and there is an ongoing project to `design a better C API`_ which aims to fix exactly -this kind of problems. +this kind of problem. Nonetheless, in the meantime we still need to implement the current -half-broken APIs: there is no easy solutions for that, and it is likely that +half-broken APIs. There is no easy solution for that, and it is likely that we will always need to pay some performance penalty in order to implement them correctly. @@ -527,22 +530,22 @@ Current performance --------------------- +------------------- -During the whole blog post we kept talking about the slowness of cpyext: how -much it is, exactly? +During the whole blog post we claimed ``cpyext`` is slow. How +slow it is, exactly? -We decided to concentrate on microbenchmarks_ for now: as it should be evident -by now there are simply too many issues which can slow down a cpyext +We decided to concentrate on microbenchmarks_ for now. It should be evident +by now there are simply too many issues which can slow down a ``cpyext`` benchmark, and microbenchmarks help us to concentrate on one (or few) at a time. -The microbenchmarks measure very simple stuff, like calling function and +The microbenchmarks measure very simple thins, like calling functions and methods with the various calling conventions (no arguments, one arguments, -multiple arguments), passing various types as arguments (to measure conversion -costs), allocating objects from C, and so on. +multiple arguments); passing various types as arguments (to measure conversion +costs); allocating objects from C, and so on. -This was the performance of PyPy 5.8 relative and normalized to CPython 2.7, +Here are the results from PyPy 5.8 relative and normalized to CPython 2.7, the lower the better: .. image:: pypy58.png @@ -555,7 +558,7 @@ of integers is huge. PyPy 5.8 was the last release before we famouse Cape Town sprint, when we -started to look at cpyext performance seriously. These are the performance for +started to look at cpyext performance seriously. Here are the performance for PyPy 6.0, the latest release at the time of writing: .. image:: pypy60.png @@ -575,9 +578,9 @@ Next steps ------------ +---------- -Despite the spectacular results we got so far, cpyext is still slow enough to +Despite the spectacular results we got so far, ``cpyext`` is still slow enough to kill performance in most real-world code which uses C extensions extensively (e.g., the omnipresent numpy). @@ -596,12 +599,13 @@ On one hand, this is a daunting task because the C API is huge and we need to tackle functions one by one. On the other hand, not all the functions are equally important, and is is enough to optimize a relatively small subset to -improve lots of different use cases. +improve many different use cases. -The biggest result is that now we have a clear picture of what are the -problems, and we developed some technical solutions to fix them. It is "only" -a matter of tackling them, one by one. Moreoever, keep in mind that most of -the work was done during two sprints, for a total 2-3 man-months. +Where a year ago we announced we have a working answer to run c-extension in PyPy, +we now have a clear picture of what are the +performance bottlenecks, and we have developed some technical solutions to fix them. It is "only" +a matter of tackling them, one by one. Most of +the work was done during two sprints, for a total 2-3 person-months of work. XXX: find a conclusion From pypy.commits at gmail.com Thu Sep 20 18:53:59 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:53:59 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: edit matti's edits Message-ID: <5ba42507.1c69fb81.5c6ea.a624@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5911:ca713867e4bc Date: 2018-09-21 00:47 +0200 http://bitbucket.org/pypy/extradoc/changeset/ca713867e4bc/ Log: edit matti's edits diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -143,7 +143,7 @@ Another challenge is that sometimes, ``PyObject*`` structs are not completely opaque: there are parts of the public API which expose to the user specific fields of some concrete C struct. For example the definition of PyTypeObject_ -which exposes many of the ``tp_*`` slots to Cython (OK - ???) +which exposes many of the ``tp_*`` slots to the user. Since the low-level layout of PyPy ``W_Root`` objects is completely different than the one used by CPython, we cannot simply pass RPython objects to C; we need a way to handle the difference. @@ -239,7 +239,7 @@ On the other hand, in the PyPy interpreter, exceptions are propagated by raising the RPython-level OperationError_ exception, which wraps the actual app-level -exception values: to harmonize the two worlds. Whenever we return from C to +exception values. To harmonize the two worlds, whenever we return from C to RPython, we need to check whether a C API exception was raised and if so turn it into an ``OperationError``. @@ -490,10 +490,10 @@ kept alive because it is in the list anyway. For PyPy this is a challenge: thanks to `list strategies`_, often lists are -represented in a compact way. A list containing only integers is stored -as a C array of ``long``. How to implement ``PyList_GetItem``? We cannot -simply create a ``PyObject*`` on the fly, because the caller will never decref -it and it will result in a memory leak. +represented in a compact way. For example, a list containing only integers is +stored as a C array of ``long``. How to implement ``PyList_GetItem``? We +cannot simply create a ``PyObject*`` on the fly, because the caller will never +decref it and it will result in a memory leak. The current solution is very inefficient. The first time we do a ``PyList_GetItem``, we convert_ the **whole** list to a list of @@ -537,7 +537,7 @@ We decided to concentrate on microbenchmarks_ for now. It should be evident by now there are simply too many issues which can slow down a ``cpyext`` -benchmark, and microbenchmarks help us to concentrate on one (or few) at a +program, and microbenchmarks help us to concentrate on one (or few) at a time. The microbenchmarks measure very simple thins, like calling functions and @@ -545,8 +545,8 @@ multiple arguments); passing various types as arguments (to measure conversion costs); allocating objects from C, and so on. -Here are the results from PyPy 5.8 relative and normalized to CPython 2.7, -the lower the better: +Here are the results from the old PyPy 5.8 relative and normalized to CPython +2.7, the lower the better: .. image:: pypy58.png @@ -601,11 +601,12 @@ equally important, and is is enough to optimize a relatively small subset to improve many different use cases. -Where a year ago we announced we have a working answer to run c-extension in PyPy, -we now have a clear picture of what are the -performance bottlenecks, and we have developed some technical solutions to fix them. It is "only" -a matter of tackling them, one by one. Most of -the work was done during two sprints, for a total 2-3 person-months of work. +Where a year ago we announced we have a working answer to run c-extension in +PyPy, we now have a clear picture of what are the performance bottlenecks, and +we have developed some technical solutions to fix them. It is "only" a matter +of tackling them, one by one. It is worth noting that most of the work was +done during two sprints, for a total 2-3 person-months of work. + XXX: find a conclusion From pypy.commits at gmail.com Thu Sep 20 18:54:02 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:54:02 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: merge matti's conclusion Message-ID: <5ba4250a.1c69fb81.f4714.3566@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5912:01209d5a3343 Date: 2018-09-21 00:48 +0200 http://bitbucket.org/pypy/extradoc/changeset/01209d5a3343/ Log: merge matti's conclusion diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -608,8 +608,13 @@ done during two sprints, for a total 2-3 person-months of work. -XXX: find a conclusion +We think this work is important for the Python ecosystem. PyPy has established +a baseline for performance in pure python code, providing an answer for the +"Python is slow" detractors. The techniques used to make ``cpyext`` performant +will let PyPy become an alternative for people who mix C extensions with python, +which, it turns out, is just about everyone. Today, many developers are forced +to seek performance by converting code from python to a lower language. We +feel there is no reason to do this, but in order to prove it we must be able +to run both their python and their c-extensions performantly, then we can begin +to educate them how to write JIT-freindly code in the first place. - - - From pypy.commits at gmail.com Thu Sep 20 18:54:04 2018 From: pypy.commits at gmail.com (antocuni) Date: Thu, 20 Sep 2018 15:54:04 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: edit the conclusion Message-ID: <5ba4250c.1c69fb81.9e19d.2b9f@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5913:e338cba36ea1 Date: 2018-09-21 00:53 +0200 http://bitbucket.org/pypy/extradoc/changeset/e338cba36ea1/ Log: edit the conclusion diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -611,10 +611,14 @@ We think this work is important for the Python ecosystem. PyPy has established a baseline for performance in pure python code, providing an answer for the "Python is slow" detractors. The techniques used to make ``cpyext`` performant -will let PyPy become an alternative for people who mix C extensions with python, -which, it turns out, is just about everyone. Today, many developers are forced -to seek performance by converting code from python to a lower language. We -feel there is no reason to do this, but in order to prove it we must be able -to run both their python and their c-extensions performantly, then we can begin -to educate them how to write JIT-freindly code in the first place. +will let PyPy become an alternative for people who mix C extensions with +python, which, it turns out, is just about everyone, in particular those using +the various scientific libraries. Today, many developers are forced to seek +performance by converting code from python to a lower language. We feel there +is no reason to do this, but in order to prove it we must be able to run both +their python and their C extensions performantly, then we can begin to educate +them how to write JIT-friendly code in the first place. +We envision a future in which you can run arbitrary Python programs on PyPy, +with the JIT speeding up the pure Python parts and the C parts running as fast +as today: the best of both worlds! From pypy.commits at gmail.com Fri Sep 21 08:16:58 2018 From: pypy.commits at gmail.com (rlamy) Date: Fri, 21 Sep 2018 05:16:58 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: fix typos Message-ID: <5ba4e13a.1c69fb81.c018.f91c@mx.google.com> Author: Ronan Lamy Branch: extradoc Changeset: r5914:e3e572f70743 Date: 2018-09-21 12:16 +0000 http://bitbucket.org/pypy/extradoc/changeset/e3e572f70743/ Log: fix typos diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -6,7 +6,7 @@ why a particular C extension doesn't work or is very slow on PyPy. Usually it is hard to answer without going into technical details. The goal of this blog post is to explain some of these technical details, so that we can -simply link here instead of explaing again and again :). +simply link here instead of explaining again and again :). From a 10.000 foot view, ``cpyext`` is PyPy's version of ``"Python.h"``. Every time you compile an extension which uses that header file, you are using ``cpyext``. @@ -81,7 +81,7 @@ Actually, the code above is not too far from the actual implementation. However, there are tons of gory details which make it much -harder than what it looks, and much slower unless you pay a lot of attention +harder than it looks, and much slower unless you pay a lot of attention to performance. @@ -270,7 +270,7 @@ Assuming that the conversion between ``W_Root`` and ``PyObject*`` has a reasonable cost (as explained by the previous section), the overhead -introduced by a single border-cross is still accettable, especially if the +introduced by a single border-cross is still acceptable, especially if the callee is doing some non-negligible amount of work. However this is not always the case. There are basically three problems that @@ -442,7 +442,7 @@ At every iteration, we get an item out of the array: the return type is a an instance of ``numpy.float64`` (a numpy scalar), i.e. a ``PyObject'*``: this is something which is implemented by numpy entirely in C, so completely -transparent to ``cpyext``. We don't have any control on how it is allocated, +opaque to ``cpyext``. We don't have any control on how it is allocated, managed, etc., and we can assume that allocation costs are the same than on CPython. @@ -454,9 +454,9 @@ However, we also need to keep track of the ``W_Root`` to ``PyObject*`` link. Currently, we do this by putting all of them in a dictionary, but it is very -inefficient, especially because most of these objects dies young and thus it +inefficient, especially because most of these objects die young and thus it is wasted work to do that for them. Currently, this is one of the biggest -unresolved problem in ``cpyext``, and it is what casuses the two microbenchmarks +unresolved problem in ``cpyext``, and it is what causes the two microbenchmarks ``allocate_int`` and ``allocate_tuple`` to be very slow. We are well aware of the problem, and we have a plan for how to fix it. The @@ -489,7 +489,7 @@ if the caller needs to handle the returned item only temporarily: the item is kept alive because it is in the list anyway. -For PyPy this is a challenge: thanks to `list strategies`_, often lists are +For PyPy, this is a challenge: thanks to `list strategies`_, lists are often represented in a compact way. For example, a list containing only integers is stored as a C array of ``long``. How to implement ``PyList_GetItem``? We cannot simply create a ``PyObject*`` on the fly, because the caller will never @@ -540,7 +540,7 @@ program, and microbenchmarks help us to concentrate on one (or few) at a time. -The microbenchmarks measure very simple thins, like calling functions and +The microbenchmarks measure very simple things, like calling functions and methods with the various calling conventions (no arguments, one arguments, multiple arguments); passing various types as arguments (to measure conversion costs); allocating objects from C, and so on. @@ -551,14 +551,14 @@ .. image:: pypy58.png PyPy was horribly slow everywhere, ranging from 2.5x to 10x slower. It is -particularly interesting to compare ``simple.noargs``, which measure the cost +particularly interesting to compare ``simple.noargs``, which measures the cost of calling an empty function with no arguments, and ``simple.onearg(i)``, -which measure the cost calling an empty function passing an integer argument: +which measures the cost calling an empty function passing an integer argument: the latter is ~2x slower than the former, indicating that the conversion cost of integers is huge. -PyPy 5.8 was the last release before we famouse Cape Town sprint, when we -started to look at cpyext performance seriously. Here are the performance for +PyPy 5.8 was the last release before the famous Cape Town sprint, when we +started to look at cpyext performance seriously. Here are the performance data for PyPy 6.0, the latest release at the time of writing: .. image:: pypy60.png @@ -567,7 +567,7 @@ most benchmarks it is even faster than CPython: yes, you read it correctly: PyPy is faster than CPython at doing CPython's job, even considering all the extra work it has to do to emulate the C API. This happens thanks to the JIT, -which produce speedups high enough to counterbalance the slowdown caused by +which produces speedups high enough to counterbalance the slowdown caused by cpyext. There are two microbenchmarks which are still slower though: ``allocate_int`` @@ -612,9 +612,9 @@ a baseline for performance in pure python code, providing an answer for the "Python is slow" detractors. The techniques used to make ``cpyext`` performant will let PyPy become an alternative for people who mix C extensions with -python, which, it turns out, is just about everyone, in particular those using +Python, which, it turns out, is just about everyone, in particular those using the various scientific libraries. Today, many developers are forced to seek -performance by converting code from python to a lower language. We feel there +performance by converting code from Python to a lower language. We feel there is no reason to do this, but in order to prove it we must be able to run both their python and their C extensions performantly, then we can begin to educate them how to write JIT-friendly code in the first place. From pypy.commits at gmail.com Fri Sep 21 09:01:38 2018 From: pypy.commits at gmail.com (antocuni) Date: Fri, 21 Sep 2018 06:01:38 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: specify better Message-ID: <5ba4ebb2.1c69fb81.58212.9b31@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5915:0eb646623fc4 Date: 2018-09-21 15:01 +0200 http://bitbucket.org/pypy/extradoc/changeset/0eb646623fc4/ Log: specify better diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -588,7 +588,7 @@ 1. run a real-world small benchmark which exercises cpyext - 2. measure and find the bottleneck + 2. measure and find the major bottleneck 3. write a corresponding microbenchmark From pypy.commits at gmail.com Fri Sep 21 11:36:22 2018 From: pypy.commits at gmail.com (antocuni) Date: Fri, 21 Sep 2018 08:36:22 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: fix links Message-ID: <5ba50ff6.1c69fb81.9d39f.eab0@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5916:dd7643873e67 Date: 2018-09-21 17:20 +0200 http://bitbucket.org/pypy/extradoc/changeset/dd7643873e67/ Log: fix links diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -55,10 +55,10 @@ Generally speaking, the only way to operate on a ``PyObject*`` is to call the appropriate API functions. For example, to convert a given ``PyObject*`` to a C -integer, you can use _`PyInt_AsLong()`; to add two objects together, you can -call _`PyNumber_Add()`. +integer, you can use `PyInt_AsLong()`_; to add two objects together, you can +call `PyNumber_Add()`_. -.. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html?highlight=pyint_check#c.PyInt_AsLong +.. _`PyInt_AsLong()`: https://docs.python.org/2/c-api/int.html#c.PyInt_AsLong .. _`PyNumber_Add()`: https://docs.python.org/2/c-api/number.html#c.PyNumber_Add Internally, PyPy uses a similar approach. All Python objects are subclasses of @@ -162,12 +162,12 @@ C, we allocate and initialize its ``PyObject*`` counterpart. The same idea applies also to objects which are created in C, e.g. by calling -_`PyObject_New`. At first, only the ``PyObject*`` exists and it is +`PyObject_New()`_. At first, only the ``PyObject*`` exists and it is exclusively managed by reference counting. As soon as we pass it to the PyPy world (e.g. as a return value of a function call), we create its ``W_Root`` counterpart, which is managed by the GC as usual. -.. _`PyObject_New`: https://docs.python.org/2/c-api/allocation.html#c.PyObject_New +.. _`PyObject_New()`: https://docs.python.org/2/c-api/allocation.html#c.PyObject_New Here we start to see why calling cpyext modules is more costly in PyPy than in CPython. We need to pay some penalty for all the conversions between @@ -194,7 +194,7 @@ make sure that the GC does not collect the ``W_Root``. The ``PyObject*`` ==> ``W_Root`` link is maintained by the special field -_`ob_pypy_link` which is added to all ``PyObject*``. On a 64 bit machine this +`ob_pypy_link`_ which is added to all ``PyObject*``. On a 64 bit machine this means that all ``PyObject*`` have 8 bytes of overhead, but then the conversion is very quick, just reading the field. @@ -204,7 +204,7 @@ waste. Instead, in the general case the link is maintained by using a dictionary, where ``W_Root`` are the keys and ``PyObject*`` the values. -However, for a _`few selected` ``W_Root`` subclasses we **do** maintain a +However, for a `few selected`_ ``W_Root`` subclasses we **do** maintain a direct link using the special ``_cpy_ref`` field to improve performance. In particular, we use it for ``W_TypeObject`` (which is big anyway, so a 8 bytes overhead is negligible) and ``W_NoneObject``. ``None`` is passed around very @@ -316,7 +316,7 @@ After a bit of investigation, we discovered this was ultimately caused by our "correctness-first" approach. For simplicity of development and testing, when we started ``cpyext`` we wrote everything in RPython: thus, every single API call -made from C (like the omnipresent `PyArg_ParseTuple`_, `PyInt_AsLong`_, etc.) +made from C (like the omnipresent `PyArg_ParseTuple()`_, `PyInt_AsLong()`_, etc.) had to cross back the C-to-RPython border. This was especially daunting for very simple and frequent operations like ``Py_INCREF`` and ``Py_DECREF``, which CPython implements as a single assembly instruction! @@ -391,10 +391,9 @@ .. _`2017 Cape Town Sprint`: https://morepypy.blogspot.com/2017/10/cape-of-good-hope-for-pypy-hello-from.html .. _`cpyext microbenchmarks`: https://github.com/antocuni/cpyext-benchmarks -.. _`PyArg_ParseTuple`: https://docs.python.org/2/c-api/arg.html#c.PyArg_ParseTuple -.. _`PyInt_AsLong`: https://docs.python.org/2/c-api/int.html#c.PyInt_AsLong +.. _`PyArg_ParseTuple()`: https://docs.python.org/2/c-api/arg.html#c.PyArg_ParseTuple .. _`tp_new`: https://docs.python.org/2/c-api/typeobj.html#c.PyTypeObject.tp_new -.. `_make_wrapper`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-362 +.. _`_make_wrapper`: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/api.py#lines-362 .. _merged: https://bitbucket.org/pypy/pypy/commits/7b550e9b3cee @@ -484,7 +483,7 @@ above. Another example is borrowed references. There are API functions which **do -not** incref an object before returning it, e.g. `PyList_GetItem`_. This is +not** incref an object before returning it, e.g. `PyList_GetItem()`_. This is done for performance reasons because we can avoid a whole incref/decref pair, if the caller needs to handle the returned item only temporarily: the item is kept alive because it is in the list anyway. @@ -523,7 +522,7 @@ ``#ifdef``) if they want to be fast on PyPy. -.. _`PyList_GetItem`: https://docs.python.org/2/c-api/list.html#c.PyList_GetItem +.. _`PyList_GetItem()`: https://docs.python.org/2/c-api/list.html#c.PyList_GetItem .. _`list strategies`: https://morepypy.blogspot.com/2011/10/more-compact-lists-with-list-strategies.html .. _convert: https://bitbucket.org/pypy/pypy/src/b9bbd6c0933349cbdbfe2b884a68a16ad16c3a8a/pypy/module/cpyext/listobject.py#lines-28 .. _`design a better C API`: https://pythoncapi.readthedocs.io/ From pypy.commits at gmail.com Fri Sep 21 11:37:03 2018 From: pypy.commits at gmail.com (antocuni) Date: Fri, 21 Sep 2018 08:37:03 -0700 (PDT) Subject: [pypy-commit] extradoc extradoc: fix rst Message-ID: <5ba5101f.1c69fb81.56e74.923e@mx.google.com> Author: Antonio Cuni Branch: extradoc Changeset: r5917:d25482c91a0f Date: 2018-09-21 17:34 +0200 http://bitbucket.org/pypy/extradoc/changeset/d25482c91a0f/ Log: fix rst diff --git a/blog/draft/2018-09-cpyext/cpyext.rst b/blog/draft/2018-09-cpyext/cpyext.rst --- a/blog/draft/2018-09-cpyext/cpyext.rst +++ b/blog/draft/2018-09-cpyext/cpyext.rst @@ -193,7 +193,7 @@ 2. as long as the ``PyObject*`` has a refcount greater than 0, we want to make sure that the GC does not collect the ``W_Root``. -The ``PyObject*`` ==> ``W_Root`` link is maintained by the special field +The ``PyObject*`` ⇨ ``W_Root`` link is maintained by the special field `ob_pypy_link`_ which is added to all ``PyObject*``. On a 64 bit machine this means that all ``PyObject*`` have 8 bytes of overhead, but then the conversion is very quick, just reading the field. @@ -342,24 +342,24 @@ return result; } - 1. you are in RPython and do a cpyext call to ``foo``: **RPython-to-C**; +1. you are in RPython and do a cpyext call to ``foo``: **RPython-to-C**; - 2. ``foo`` calls ``PyInt_FromLong(1234)``, which is implemented in RPython: - **C-to-RPython**; +2. ``foo`` calls ``PyInt_FromLong(1234)``, which is implemented in RPython: + **C-to-RPython**; - 3. the implementation of ``PyInt_FromLong`` indirectly calls - ``PyIntType.tp_new``, which is a C function pointer: **RPython-to-C**; +3. the implementation of ``PyInt_FromLong`` indirectly calls + ``PyIntType.tp_new``, which is a C function pointer: **RPython-to-C**; - 4. however, ``tp_new`` is just a wrapper around an RPython function, created - by ``_make_wrapper``: **C-to-RPython**; +4. however, ``tp_new`` is just a wrapper around an RPython function, created + by ``_make_wrapper``: **C-to-RPython**; - 5. finally, we create our RPython ``W_IntObject(1234)``; at some point - during the **RPython-to-C** crossing, its ``PyObject*`` equivalent is - created; +5. finally, we create our RPython ``W_IntObject(1234)``; at some point + during the **RPython-to-C** crossing, its ``PyObject*`` equivalent is + created; - 6. after many layers of wrappers, we are again in ``foo``: after we do - ``return result``, during the **C-to-RPython** step we convert it from - ``PyObject*`` to ``W_IntObject(1234)``. +6. after many layers of wrappers, we are again in ``foo``: after we do + ``return result``, during the **C-to-RPython** step we convert it from + ``PyObject*`` to ``W_IntObject(1234)``. Phew! After we realized this, it was not so surprising that ``cpyext`` was very slow :). And this was a simplified example, since we are not passing a From pypy.commits at gmail.com Sat Sep 22 17:14:52 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 22 Sep 2018 14:14:52 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: use index instead of pos for errorhandler Message-ID: <5ba6b0cc.1c69fb81.768b4.309a@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95152:b972c7b3bc19 Date: 2018-09-22 19:39 +0300 http://bitbucket.org/pypy/pypy/changeset/b972c7b3bc19/ Log: use index instead of pos for errorhandler diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1401,7 +1401,7 @@ if errorhandler: res_8, newindex = errorhandler( errors, public_encoding_name, 'malformed unicode', - s, pos - 1, pos) + s, index, index+1) if res_8: for cp in rutf8.Utf8StringIterator(res_8): if cp < 0xD800: @@ -1409,7 +1409,7 @@ else: errorhandler('strict', public_encoding_name, 'malformed unicode', - s, pos-1, pos) + s, index, index+1) else: _STORECHAR32(result, ch, byteorder) else: @@ -1419,14 +1419,14 @@ if not allow_surrogates and 0xD800 <= ch < 0xE000: res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) + s, index, index+1) for ch in rutf8.Utf8StringIterator(res_8): if ch < 0xD800: _STORECHAR32(result, ch, byteorder) else: errorhandler( 'strict', public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) + s, index, index+1) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) From pypy.commits at gmail.com Sat Sep 22 17:14:54 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 22 Sep 2018 14:14:54 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: handle bad surrogate pairs Message-ID: <5ba6b0ce.1c69fb81.3f351.1ed4@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95153:6db0f3da1041 Date: 2018-09-22 19:40 +0300 http://bitbucket.org/pypy/pypy/changeset/6db0f3da1041/ Log: handle bad surrogate pairs diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -759,6 +759,8 @@ if ordch1 <= 0x7F: self._pos = pos + 1 return ordch1 + if pos + 1 >= len(code): + return ordch1 ordch2 = ord(code[pos+1]) if ordch1 <= 0xDF: From pypy.commits at gmail.com Sat Sep 22 17:14:56 2018 From: pypy.commits at gmail.com (mattip) Date: Sat, 22 Sep 2018 14:14:56 -0700 (PDT) Subject: [pypy-commit] pypy default: allow self.cc to have spaces Message-ID: <5ba6b0d0.1c69fb81.f207.96bc@mx.google.com> Author: Matti Picus Branch: Changeset: r95154:ca3f54735832 Date: 2018-09-23 00:08 +0300 http://bitbucket.org/pypy/pypy/changeset/ca3f54735832/ Log: allow self.cc to have spaces diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -177,7 +177,7 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, '', + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: @@ -187,7 +187,7 @@ self.version = 0 # Try to find a masm assembler - returncode, stdout, stderr = _run_subprocess('ml.exe', '', + returncode, stdout, stderr = _run_subprocess('ml.exe', [], env=self.c_environ) r = re.search('Macro Assembler', stderr) if r is None and os.path.exists('c:/masm32/bin/ml.exe'): From pypy.commits at gmail.com Sun Sep 23 05:08:16 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 23 Sep 2018 02:08:16 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5ba75800.1c69fb81.8df91.db97@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95155:700122e469d9 Date: 2018-09-23 11:07 +0200 http://bitbucket.org/pypy/pypy/changeset/700122e469d9/ Log: hg merge default diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -61,6 +61,9 @@ git = mod.StupidGit() +class ExampleClassWithSlot(object): + __slots__ = 'myslot' + class IsTestBase(unittest.TestCase): predicates = set([inspect.isbuiltin, inspect.isclass, inspect.iscode, inspect.isframe, inspect.isfunction, inspect.ismethod, @@ -131,8 +134,11 @@ self.istest(inspect.iscoroutinefunction, 'coroutine_function_example') if hasattr(types, 'MemberDescriptorType'): - self.istest(inspect.ismemberdescriptor, - 'type(lambda: None).__globals__') + # App-level slots are member descriptors on both PyPy and + # CPython, but the various built-in attributes are all + # getsetdescriptors on PyPy. So check ismemberdescriptor() + # with an app-level slot. + self.istest(inspect.ismemberdescriptor, 'ExampleClassWithSlot.myslot') else: self.assertFalse(inspect.ismemberdescriptor(datetime.timedelta.days)) diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -486,6 +486,8 @@ return cobj, cobj._to_ffi_param(), type(cobj) def _convert_args_for_callback(self, argtypes, args): + from _ctypes.structure import StructOrUnion + # assert len(argtypes) == len(args) newargs = [] for argtype, arg in zip(argtypes, args): @@ -495,6 +497,10 @@ param = param._get_buffer_value() elif self._is_primitive(argtype): param = param.value + elif isinstance(param, StructOrUnion): # not a *pointer* to struct + newparam = StructOrUnion.__new__(type(param)) + param._copy_to(newparam._buffer.buffer) + param = newparam newargs.append(param) return newargs diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -162,6 +162,17 @@ module_path = module_name.split('.') module_path[-1] += '.py' generate_mod(os.path.join(self.build_lib, *module_path)) + def get_source_files(self): + # This is called from 'setup.py sdist' only. Exclude + # the generate .py module in this case. + saved_py_modules = self.py_modules + try: + if saved_py_modules: + self.py_modules = [m for m in saved_py_modules + if m != module_name] + return base_class.get_source_files(self) + finally: + self.py_modules = saved_py_modules dist.cmdclass['build_py'] = build_py_make_mod # distutils and setuptools have no notion I could find of a @@ -171,6 +182,7 @@ # the module. So we add it here, which gives a few apparently # harmless warnings about not finding the file outside the # build directory. + # Then we need to hack more in get_source_files(); see above. if dist.py_modules is None: dist.py_modules = [] dist.py_modules.append(module_name) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,3 +36,6 @@ .. branch: pyparser-improvements-3 Small refactorings in the Python parser. + +.. branch: fix-readme-typo + diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -108,101 +108,98 @@ max_jitdriver = jit.JitDriver(name='max', greens=['has_key', 'has_item', 'w_type'], reds='auto') -def make_min_max(unroll): - @specialize.arg(2) - def min_max_impl(space, args, implementation_of): - if implementation_of == "max": - compare = space.gt - jitdriver = max_jitdriver + at specialize.arg(4) +def min_max_sequence(space, w_sequence, w_key, w_default, implementation_of): + if implementation_of == "max": + compare = space.gt + jitdriver = max_jitdriver + else: + compare = space.lt + jitdriver = min_jitdriver + w_iter = space.iter(w_sequence) + w_type = space.type(w_iter) + has_key = w_key is not None + has_item = False + w_max_item = w_default + w_max_val = None + while True: + jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, + w_type=w_type) + try: + w_item = space.next(w_iter) + except OperationError as e: + if not e.match(space, space.w_StopIteration): + raise + break + if has_key: + w_compare_with = space.call_function(w_key, w_item) else: - compare = space.lt - jitdriver = min_jitdriver - any_kwds = bool(args.keywords) - args_w = args.arguments_w - if len(args_w) > 1: - if unroll and len(args_w) == 2 and not any_kwds: - # a fast path for the common case, useful for interpreted - # mode and to reduce the length of the jit trace - w0, w1 = args_w - if space.is_true(compare(w1, w0)): - return w1 - else: - return w0 - w_sequence = space.newtuple(args_w) - elif len(args_w): - w_sequence = args_w[0] + w_compare_with = w_item + if (not has_item or + space.is_true(compare(w_compare_with, w_max_val))): + has_item = True + w_max_item = w_item + w_max_val = w_compare_with + if w_max_item is None: + raise oefmt(space.w_ValueError, "arg is an empty sequence") + return w_max_item + + at specialize.arg(3) + at jit.look_inside_iff(lambda space, args_w, w_key, implementation_of: + jit.loop_unrolling_heuristic(args_w, len(args_w), 3)) +def min_max_multiple_args(space, args_w, w_key, implementation_of): + # case of multiple arguments (at least two). We unroll it if there + # are 2 or 3 arguments. + if implementation_of == "max": + compare = space.gt + else: + compare = space.lt + w_max_item = args_w[0] + if w_key is not None: + w_max_val = space.call_function(w_key, w_max_item) + else: + w_max_val = w_max_item + for i in range(1, len(args_w)): + w_item = args_w[i] + if w_key is not None: + w_compare_with = space.call_function(w_key, w_item) else: - raise oefmt(space.w_TypeError, - "%s() expects at least one argument", - implementation_of) - w_key = None - w_default = None - if any_kwds: - kwds = args.keywords - for n in range(len(kwds)): - if kwds[n] == "key": - w_key = args.keywords_w[n] - elif kwds[n] == "default": - w_default = args.keywords_w[n] - else: - raise oefmt(space.w_TypeError, - "%s() got unexpected keyword argument", - implementation_of) - - if w_default is not None and len(args_w) > 1: - raise oefmt(space.w_TypeError, - "Cannot specify a default for %s() with multiple positional arguments", - implementation_of) - - w_iter = space.iter(w_sequence) - w_type = space.type(w_iter) - has_key = w_key is not None - has_item = False - w_max_item = None - w_max_val = None - while True: - if not unroll: - jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, w_type=w_type) - try: - w_item = space.next(w_iter) - except OperationError as e: - if not e.match(space, space.w_StopIteration): - raise - break - if has_key: - w_compare_with = space.call_function(w_key, w_item) - else: - w_compare_with = w_item - if not has_item or \ - space.is_true(compare(w_compare_with, w_max_val)): - has_item = True - w_max_item = w_item - w_max_val = w_compare_with - if w_max_item is None: - if w_default is not None: - w_max_item = w_default - else: - raise oefmt(space.w_ValueError, "arg is an empty sequence") - return w_max_item - if unroll: - min_max_impl = jit.unroll_safe(min_max_impl) - return min_max_impl - -min_max_unroll = make_min_max(True) -min_max_normal = make_min_max(False) + w_compare_with = w_item + if space.is_true(compare(w_compare_with, w_max_val)): + w_max_item = w_item + w_max_val = w_compare_with + return w_max_item @specialize.arg(2) def min_max(space, args, implementation_of): - # the 'normal' version includes a JIT merge point, which will make a - # new loop (from the interpreter or from another JIT loop). If we - # give exactly two arguments to the call to max(), or a JIT virtual - # list of arguments, then we pick the 'unroll' version with no JIT - # merge point. - if jit.isvirtual(args.arguments_w) or len(args.arguments_w) == 2: - return min_max_unroll(space, args, implementation_of) + w_key = None + w_default = None + if bool(args.keywords): + kwds = args.keywords + for n in range(len(kwds)): + if kwds[n] == "key": + w_key = args.keywords_w[n] + elif kwds[n] == "default": + w_default = args.keywords_w[n] + else: + raise oefmt(space.w_TypeError, + "%s() got unexpected keyword argument", + implementation_of) + # + args_w = args.arguments_w + if len(args_w) > 1: + if w_default is not None: + raise oefmt(space.w_TypeError, + "Cannot specify a default for %s() with multiple " + "positional arguments", implementation_of) + return min_max_multiple_args(space, args_w, w_key, implementation_of) + elif len(args_w): + return min_max_sequence(space, args_w[0], w_key, w_default, + implementation_of) else: - return min_max_normal(space, args, implementation_of) -min_max._always_inline = True + raise oefmt(space.w_TypeError, + "%s() expects at least one argument", + implementation_of) def max(space, __args__): """max(iterable[, key=func]) -> value diff --git a/pypy/module/__builtin__/test/test_functional.py b/pypy/module/__builtin__/test/test_functional.py --- a/pypy/module/__builtin__/test/test_functional.py +++ b/pypy/module/__builtin__/test/test_functional.py @@ -654,6 +654,10 @@ raises(TypeError, max, 1, default=0) raises(TypeError, max, default=1) + def test_max_list_and_key(self): + assert max(["100", "50", "30", "-200"], key=int) == "100" + assert max("100", "50", "30", "-200", key=int) == "100" + try: from hypothesis import given, strategies, example diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3957,6 +3957,7 @@ z3 = cast(BVoidP, 0) z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 newp(new_pointer_type(BVoidP), z1) # fine diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py --- a/pypy/module/pypyjit/test_pypy_c/test_min_max.py +++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py @@ -30,37 +30,42 @@ sa = 0 while i < 30000: lst = range(i % 1000 + 2) - sa += max(*lst) # ID: max + sa += max(*lst) # ID: callmax i += 1 return sa log = self.run(main, []) assert log.result == main() loop, = log.loops_by_filename(self.filepath) - assert loop.match(""" + assert loop.match_by_id('callmax', """ ... - p76 = call_assembler_r(..., descr=...) + p76 = call_may_force_r(_, _, _, _, descr=...) ... """) - loop2 = log.loops[0] - loop2.match(''' - ... - label(..., descr=...) - ... - label(..., descr=...) - guard_not_invalidated? - i17 = int_ge(i11, i7) - guard_false(i17, descr=...) - p18 = getarrayitem_gc_r(p5, i11, descr=...) - i19 = int_add(i11, 1) - setfield_gc(p2, i19, descr=...) - guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) - i20 = getfield_gc_i(p18, descr=...) - i21 = int_gt(i20, i14) - guard_true(i21, descr=...) - jump(..., descr=...) - ''') - # XXX could be "guard_class(p18)" instead; we lost somewhere - # the information that it cannot be null. + + #----- the following logic used to check the content of the assembly + #----- generated for the loop in max(), but now we no longer produce + #----- any custom assembly in this case. It used to say + #----- 'call_assembler_r' above, and now it says 'call_may_force_r'. + #loop2 = log.loops[0] + #loop2.match(''' + #... + #label(..., descr=...) + #... + #label(..., descr=...) + #guard_not_invalidated? + #i17 = int_ge(i11, i7) + #guard_false(i17, descr=...) + #p18 = getarrayitem_gc_r(p5, i11, descr=...) + #i19 = int_add(i11, 1) + #setfield_gc(p2, i19, descr=...) + #guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) + #i20 = getfield_gc_i(p18, descr=...) + #i21 = int_gt(i20, i14) + #guard_true(i21, descr=...) + #jump(..., descr=...) + #''') + ## XXX could be "guard_class(p18)" instead; we lost somewhere + ## the information that it cannot be null. def test_iter_max(self): def main(): diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py @@ -1387,6 +1387,7 @@ ffi = FFI(backend=self.Backend()) ffi.cdef("enum foo;") with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") n = ffi.cast("enum foo", -1) assert int(n) == 0xffffffff assert str(log[0].message) == ( diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py @@ -46,14 +46,14 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_sin_no_return_value(self): + def test_lround_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void sin(double x); + void lround(double x); """) m = ffi.dlopen(lib_m) - x = m.sin(1.23) + x = m.lround(1.23) assert x is None def test_dlopen_filename(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -160,15 +160,17 @@ proto = CFUNCTYPE(c_int, RECT) def callback(point): + point.left *= -1 return point.left+point.top+point.right+point.bottom cbp = proto(callback) - rect = RECT(1000,100,10,1) + rect = RECT(-1000,100,10,1) res = cbp(rect) assert res == 1111 + assert rect.left == -1000 # must not have been changed! def test_callback_from_c_with_struct_argument(self): import conftest diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -186,7 +186,7 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, '', + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: @@ -196,7 +196,7 @@ self.version = 0 # Try to find a masm assembler - returncode, stdout, stderr = _run_subprocess('ml.exe', '', + returncode, stdout, stderr = _run_subprocess('ml.exe', [], env=self.c_environ) r = re.search('Macro Assembler', stderr) if r is None and os.path.exists('c:/masm32/bin/ml.exe'): From pypy.commits at gmail.com Sun Sep 23 07:08:24 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 23 Sep 2018 04:08:24 -0700 (PDT) Subject: [pypy-commit] pypy default: fix error message Message-ID: <5ba77428.1c69fb81.45493.d19d@mx.google.com> Author: Armin Rigo Branch: Changeset: r95156:3c6307e2bd64 Date: 2018-09-23 13:07 +0200 http://bitbucket.org/pypy/pypy/changeset/3c6307e2bd64/ Log: fix error message diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -151,7 +151,7 @@ if getattr(func, '_elidable_function_', False): raise TypeError("it does not make sense for %s to be both elidable and unroll_safe" % func) if not getattr(func, '_jit_look_inside_', True): - raise TypeError("it does not make sense for %s to be both elidable and dont_look_inside" % func) + raise TypeError("it does not make sense for %s to be both unroll_safe and dont_look_inside" % func) func._jit_unroll_safe_ = True return func From pypy.commits at gmail.com Sun Sep 23 07:08:38 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 23 Sep 2018 04:08:38 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: oops, unroll this dummy loop Message-ID: <5ba77436.1c69fb81.ae453.236f@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95157:b143f3f8da5d Date: 2018-09-23 13:08 +0200 http://bitbucket.org/pypy/pypy/changeset/b143f3f8da5d/ Log: oops, unroll this dummy loop diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -170,6 +170,7 @@ w_max_val = w_compare_with return w_max_item + at jit.unroll_safe # the loop over kwds @specialize.arg(2) def min_max(space, args, implementation_of): w_key = None From pypy.commits at gmail.com Sun Sep 23 09:20:43 2018 From: pypy.commits at gmail.com (mattip) Date: Sun, 23 Sep 2018 06:20:43 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8-py3: merge py3.5 into branch Message-ID: <5ba7932b.1c69fb81.4f325.6b5d@mx.google.com> Author: Matti Picus Branch: unicode-utf8-py3 Changeset: r95158:5a2d488be5e6 Date: 2018-09-23 16:08 +0300 http://bitbucket.org/pypy/pypy/changeset/5a2d488be5e6/ Log: merge py3.5 into branch diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -61,6 +61,9 @@ git = mod.StupidGit() +class ExampleClassWithSlot(object): + __slots__ = 'myslot' + class IsTestBase(unittest.TestCase): predicates = set([inspect.isbuiltin, inspect.isclass, inspect.iscode, inspect.isframe, inspect.isfunction, inspect.ismethod, @@ -131,8 +134,11 @@ self.istest(inspect.iscoroutinefunction, 'coroutine_function_example') if hasattr(types, 'MemberDescriptorType'): - self.istest(inspect.ismemberdescriptor, - 'type(lambda: None).__globals__') + # App-level slots are member descriptors on both PyPy and + # CPython, but the various built-in attributes are all + # getsetdescriptors on PyPy. So check ismemberdescriptor() + # with an app-level slot. + self.istest(inspect.ismemberdescriptor, 'ExampleClassWithSlot.myslot') else: self.assertFalse(inspect.ismemberdescriptor(datetime.timedelta.days)) diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -486,6 +486,8 @@ return cobj, cobj._to_ffi_param(), type(cobj) def _convert_args_for_callback(self, argtypes, args): + from _ctypes.structure import StructOrUnion + # assert len(argtypes) == len(args) newargs = [] for argtype, arg in zip(argtypes, args): @@ -495,6 +497,10 @@ param = param._get_buffer_value() elif self._is_primitive(argtype): param = param.value + elif isinstance(param, StructOrUnion): # not a *pointer* to struct + newparam = StructOrUnion.__new__(type(param)) + param._copy_to(newparam._buffer.buffer) + param = newparam newargs.append(param) return newargs diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -162,6 +162,17 @@ module_path = module_name.split('.') module_path[-1] += '.py' generate_mod(os.path.join(self.build_lib, *module_path)) + def get_source_files(self): + # This is called from 'setup.py sdist' only. Exclude + # the generate .py module in this case. + saved_py_modules = self.py_modules + try: + if saved_py_modules: + self.py_modules = [m for m in saved_py_modules + if m != module_name] + return base_class.get_source_files(self) + finally: + self.py_modules = saved_py_modules dist.cmdclass['build_py'] = build_py_make_mod # distutils and setuptools have no notion I could find of a @@ -171,6 +182,7 @@ # the module. So we add it here, which gives a few apparently # harmless warnings about not finding the file outside the # build directory. + # Then we need to hack more in get_source_files(); see above. if dist.py_modules is None: dist.py_modules = [] dist.py_modules.append(module_name) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,7 +36,10 @@ Small refactorings in the Python parser. +.. branch: fix-readme-typo + .. branch: unicode-utf8-re + .. branch: utf8-io Utf8 handling for unicode diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -108,101 +108,99 @@ max_jitdriver = jit.JitDriver(name='max', greens=['has_key', 'has_item', 'w_type'], reds='auto') -def make_min_max(unroll): - @specialize.arg(2) - def min_max_impl(space, args, implementation_of): - if implementation_of == "max": - compare = space.gt - jitdriver = max_jitdriver + at specialize.arg(4) +def min_max_sequence(space, w_sequence, w_key, w_default, implementation_of): + if implementation_of == "max": + compare = space.gt + jitdriver = max_jitdriver + else: + compare = space.lt + jitdriver = min_jitdriver + w_iter = space.iter(w_sequence) + w_type = space.type(w_iter) + has_key = w_key is not None + has_item = False + w_max_item = w_default + w_max_val = None + while True: + jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, + w_type=w_type) + try: + w_item = space.next(w_iter) + except OperationError as e: + if not e.match(space, space.w_StopIteration): + raise + break + if has_key: + w_compare_with = space.call_function(w_key, w_item) else: - compare = space.lt - jitdriver = min_jitdriver - any_kwds = bool(args.keywords) - args_w = args.arguments_w - if len(args_w) > 1: - if unroll and len(args_w) == 2 and not any_kwds: - # a fast path for the common case, useful for interpreted - # mode and to reduce the length of the jit trace - w0, w1 = args_w - if space.is_true(compare(w1, w0)): - return w1 - else: - return w0 - w_sequence = space.newtuple(args_w) - elif len(args_w): - w_sequence = args_w[0] + w_compare_with = w_item + if (not has_item or + space.is_true(compare(w_compare_with, w_max_val))): + has_item = True + w_max_item = w_item + w_max_val = w_compare_with + if w_max_item is None: + raise oefmt(space.w_ValueError, "arg is an empty sequence") + return w_max_item + + at specialize.arg(3) + at jit.look_inside_iff(lambda space, args_w, w_key, implementation_of: + jit.loop_unrolling_heuristic(args_w, len(args_w), 3)) +def min_max_multiple_args(space, args_w, w_key, implementation_of): + # case of multiple arguments (at least two). We unroll it if there + # are 2 or 3 arguments. + if implementation_of == "max": + compare = space.gt + else: + compare = space.lt + w_max_item = args_w[0] + if w_key is not None: + w_max_val = space.call_function(w_key, w_max_item) + else: + w_max_val = w_max_item + for i in range(1, len(args_w)): + w_item = args_w[i] + if w_key is not None: + w_compare_with = space.call_function(w_key, w_item) else: - raise oefmt(space.w_TypeError, - "%s() expects at least one argument", - implementation_of) - w_key = None - w_default = None - if any_kwds: - kwds = args.keywords - for n in range(len(kwds)): - if kwds[n] == "key": - w_key = args.keywords_w[n] - elif kwds[n] == "default": - w_default = args.keywords_w[n] - else: - raise oefmt(space.w_TypeError, - "%s() got unexpected keyword argument", - implementation_of) + w_compare_with = w_item + if space.is_true(compare(w_compare_with, w_max_val)): + w_max_item = w_item + w_max_val = w_compare_with + return w_max_item - if w_default is not None and len(args_w) > 1: - raise oefmt(space.w_TypeError, - "Cannot specify a default for %s() with multiple positional arguments", - implementation_of) - - w_iter = space.iter(w_sequence) - w_type = space.type(w_iter) - has_key = w_key is not None - has_item = False - w_max_item = None - w_max_val = None - while True: - if not unroll: - jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, w_type=w_type) - try: - w_item = space.next(w_iter) - except OperationError as e: - if not e.match(space, space.w_StopIteration): - raise - break - if has_key: - w_compare_with = space.call_function(w_key, w_item) - else: - w_compare_with = w_item - if not has_item or \ - space.is_true(compare(w_compare_with, w_max_val)): - has_item = True - w_max_item = w_item - w_max_val = w_compare_with - if w_max_item is None: - if w_default is not None: - w_max_item = w_default - else: - raise oefmt(space.w_ValueError, "arg is an empty sequence") - return w_max_item - if unroll: - min_max_impl = jit.unroll_safe(min_max_impl) - return min_max_impl - -min_max_unroll = make_min_max(True) -min_max_normal = make_min_max(False) - + at jit.unroll_safe # the loop over kwds @specialize.arg(2) def min_max(space, args, implementation_of): - # the 'normal' version includes a JIT merge point, which will make a - # new loop (from the interpreter or from another JIT loop). If we - # give exactly two arguments to the call to max(), or a JIT virtual - # list of arguments, then we pick the 'unroll' version with no JIT - # merge point. - if jit.isvirtual(args.arguments_w) or len(args.arguments_w) == 2: - return min_max_unroll(space, args, implementation_of) + w_key = None + w_default = None + if bool(args.keywords): + kwds = args.keywords + for n in range(len(kwds)): + if kwds[n] == "key": + w_key = args.keywords_w[n] + elif kwds[n] == "default": + w_default = args.keywords_w[n] + else: + raise oefmt(space.w_TypeError, + "%s() got unexpected keyword argument", + implementation_of) + # + args_w = args.arguments_w + if len(args_w) > 1: + if w_default is not None: + raise oefmt(space.w_TypeError, + "Cannot specify a default for %s() with multiple " + "positional arguments", implementation_of) + return min_max_multiple_args(space, args_w, w_key, implementation_of) + elif len(args_w): + return min_max_sequence(space, args_w[0], w_key, w_default, + implementation_of) else: - return min_max_normal(space, args, implementation_of) -min_max._always_inline = True + raise oefmt(space.w_TypeError, + "%s() expects at least one argument", + implementation_of) def max(space, __args__): """max(iterable[, key=func]) -> value diff --git a/pypy/module/__builtin__/test/test_functional.py b/pypy/module/__builtin__/test/test_functional.py --- a/pypy/module/__builtin__/test/test_functional.py +++ b/pypy/module/__builtin__/test/test_functional.py @@ -654,6 +654,10 @@ raises(TypeError, max, 1, default=0) raises(TypeError, max, default=1) + def test_max_list_and_key(self): + assert max(["100", "50", "30", "-200"], key=int) == "100" + assert max("100", "50", "30", "-200", key=int) == "100" + try: from hypothesis import given, strategies, example diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -3957,6 +3957,7 @@ z3 = cast(BVoidP, 0) z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 newp(new_pointer_type(BVoidP), z1) # fine diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py --- a/pypy/module/pypyjit/test_pypy_c/test_min_max.py +++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py @@ -30,37 +30,42 @@ sa = 0 while i < 30000: lst = range(i % 1000 + 2) - sa += max(*lst) # ID: max + sa += max(*lst) # ID: callmax i += 1 return sa log = self.run(main, []) assert log.result == main() loop, = log.loops_by_filename(self.filepath) - assert loop.match(""" + assert loop.match_by_id('callmax', """ ... - p76 = call_assembler_r(..., descr=...) + p76 = call_may_force_r(_, _, _, _, descr=...) ... """) - loop2 = log.loops[0] - loop2.match(''' - ... - label(..., descr=...) - ... - label(..., descr=...) - guard_not_invalidated? - i17 = int_ge(i11, i7) - guard_false(i17, descr=...) - p18 = getarrayitem_gc_r(p5, i11, descr=...) - i19 = int_add(i11, 1) - setfield_gc(p2, i19, descr=...) - guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) - i20 = getfield_gc_i(p18, descr=...) - i21 = int_gt(i20, i14) - guard_true(i21, descr=...) - jump(..., descr=...) - ''') - # XXX could be "guard_class(p18)" instead; we lost somewhere - # the information that it cannot be null. + + #----- the following logic used to check the content of the assembly + #----- generated for the loop in max(), but now we no longer produce + #----- any custom assembly in this case. It used to say + #----- 'call_assembler_r' above, and now it says 'call_may_force_r'. + #loop2 = log.loops[0] + #loop2.match(''' + #... + #label(..., descr=...) + #... + #label(..., descr=...) + #guard_not_invalidated? + #i17 = int_ge(i11, i7) + #guard_false(i17, descr=...) + #p18 = getarrayitem_gc_r(p5, i11, descr=...) + #i19 = int_add(i11, 1) + #setfield_gc(p2, i19, descr=...) + #guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) + #i20 = getfield_gc_i(p18, descr=...) + #i21 = int_gt(i20, i14) + #guard_true(i21, descr=...) + #jump(..., descr=...) + #''') + ## XXX could be "guard_class(p18)" instead; we lost somewhere + ## the information that it cannot be null. def test_iter_max(self): def main(): diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py @@ -1387,6 +1387,7 @@ ffi = FFI(backend=self.Backend()) ffi.cdef("enum foo;") with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") n = ffi.cast("enum foo", -1) assert int(n) == 0xffffffff assert str(log[0].message) == ( diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py @@ -46,14 +46,14 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_sin_no_return_value(self): + def test_lround_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void sin(double x); + void lround(double x); """) m = ffi.dlopen(lib_m) - x = m.sin(1.23) + x = m.lround(1.23) assert x is None def test_dlopen_filename(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -160,15 +160,17 @@ proto = CFUNCTYPE(c_int, RECT) def callback(point): + point.left *= -1 return point.left+point.top+point.right+point.bottom cbp = proto(callback) - rect = RECT(1000,100,10,1) + rect = RECT(-1000,100,10,1) res = cbp(rect) assert res == 1111 + assert rect.left == -1000 # must not have been changed! def test_callback_from_c_with_struct_argument(self): import conftest diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -186,7 +186,7 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, '', + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: @@ -196,7 +196,7 @@ self.version = 0 # Try to find a masm assembler - returncode, stdout, stderr = _run_subprocess('ml.exe', '', + returncode, stdout, stderr = _run_subprocess('ml.exe', [], env=self.c_environ) r = re.search('Macro Assembler', stderr) if r is None and os.path.exists('c:/masm32/bin/ml.exe'): From pypy.commits at gmail.com Mon Sep 24 06:22:56 2018 From: pypy.commits at gmail.com (arigo) Date: Mon, 24 Sep 2018 03:22:56 -0700 (PDT) Subject: [pypy-commit] pypy default: Merged in hberaud/pypy/avoid_shell_injection_in_shutil (pull request #628) Message-ID: <5ba8bb00.1c69fb81.1f02.ed30@mx.google.com> Author: Armin Rigo Branch: Changeset: r95160:496edc03bd13 Date: 2018-09-24 10:22 +0000 http://bitbucket.org/pypy/pypy/changeset/496edc03bd13/ Log: Merged in hberaud/pypy/avoid_shell_injection_in_shutil (pull request #628) Use subprocess to Avoid shell injection in shutil module diff --git a/lib-python/2.7/shutil.py b/lib-python/2.7/shutil.py --- a/lib-python/2.7/shutil.py +++ b/lib-python/2.7/shutil.py @@ -396,17 +396,21 @@ return archive_name -def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): +def _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger): # XXX see if we want to keep an external call here if verbose: zipoptions = "-r" else: zipoptions = "-rq" - from distutils.errors import DistutilsExecError - from distutils.spawn import spawn + cmd = ["zip", zipoptions, zip_filename, base_dir] + if logger is not None: + logger.info(' '.join(cmd)) + if dry_run: + return + import subprocess try: - spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) - except DistutilsExecError: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: # XXX really should distinguish between "couldn't find # external 'zip' command" and "zip failed". raise ExecError, \ @@ -440,7 +444,7 @@ zipfile = None if zipfile is None: - _call_external_zip(base_dir, zip_filename, verbose, dry_run) + _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger) else: if logger is not None: logger.info("creating '%s' and adding '%s' to it", From pypy.commits at gmail.com Mon Sep 24 06:23:04 2018 From: pypy.commits at gmail.com (hberaud) Date: Mon, 24 Sep 2018 03:23:04 -0700 (PDT) Subject: [pypy-commit] pypy avoid_shell_injection_in_shutil: Use subprocess to Avoid shell injection in shutil module Message-ID: <5ba8bb08.1c69fb81.f8e17.5eac@mx.google.com> Author: Hervé Beraud Branch: avoid_shell_injection_in_shutil Changeset: r95159:4f1f9091670a Date: 2018-09-24 10:33 +0200 http://bitbucket.org/pypy/pypy/changeset/4f1f9091670a/ Log: Use subprocess to Avoid shell injection in shutil module Convert shutil._call_external_zip to use subprocess rather than distutlils.spawn Subject: When shutil.make_archive falls back to te external zip problem, it use subprocess to invoke it rather than distutils.spawn. This closes a possible shell injection vector. distutils.spawn isn't very good at quoting command lines. Resolve: https://bugs.python.org/issue34540 Original-Author: Benjamin Peterson diff --git a/lib-python/2.7/shutil.py b/lib-python/2.7/shutil.py --- a/lib-python/2.7/shutil.py +++ b/lib-python/2.7/shutil.py @@ -396,17 +396,21 @@ return archive_name -def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): +def _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger): # XXX see if we want to keep an external call here if verbose: zipoptions = "-r" else: zipoptions = "-rq" - from distutils.errors import DistutilsExecError - from distutils.spawn import spawn + cmd = ["zip", zipoptions, zip_filename, base_dir] + if logger is not None: + logger.info(' '.join(cmd)) + if dry_run: + return + import subprocess try: - spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) - except DistutilsExecError: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: # XXX really should distinguish between "couldn't find # external 'zip' command" and "zip failed". raise ExecError, \ @@ -440,7 +444,7 @@ zipfile = None if zipfile is None: - _call_external_zip(base_dir, zip_filename, verbose, dry_run) + _call_external_zip(base_dir, zip_filename, verbose, dry_run, logger) else: if logger is not None: logger.info("creating '%s' and adding '%s' to it", From pypy.commits at gmail.com Tue Sep 25 11:45:07 2018 From: pypy.commits at gmail.com (arigo) Date: Tue, 25 Sep 2018 08:45:07 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Issue #2886 Message-ID: <5baa5803.1c69fb81.d557b.8084@mx.google.com> Author: Armin Rigo Branch: py3.6 Changeset: r95161:1cef1b3f6ac9 Date: 2018-09-25 17:44 +0200 http://bitbucket.org/pypy/pypy/changeset/1cef1b3f6ac9/ Log: Issue #2886 SyntaxError on float literals with underscores in the fractional part (I *think* the fix is just this one line in gendfa.py) diff --git a/pypy/interpreter/pyparser/dfa_generated.py b/pypy/interpreter/pyparser/dfa_generated.py --- a/pypy/interpreter/pyparser/dfa_generated.py +++ b/pypy/interpreter/pyparser/dfa_generated.py @@ -10,11 +10,12 @@ False, False, True, False, False, True, False, False, True, False, True, False, True, False, False, True, False, False, True, False, True, - False, True, False, True, False, False, False, - False, True, True, False, False, False, False, - True, False, True, False, True, False, True, - False, True, True, False, True, False, True, - False, False, True, True, True, True, True] + False, True, False, True, False, False, True, + False, False, True, True, False, False, False, + False, True, False, True, False, True, False, + False, True, False, True, True, False, True, + False, True, False, True, False, True, True, True, + True, True] states = [ # 0 {'\t': 0, '\n': 15, '\x0c': 0, @@ -190,23 +191,23 @@ '7': 27, '8': 27, '9': 27, 'E': 29, 'J': 15, '_': 49, 'e': 29, 'j': 15}, # 28 - {'0': 28, '1': 28, '2': 28, '3': 28, - '4': 28, '5': 28, '6': 28, '7': 28, - '8': 28, '9': 28, 'E': 50, 'J': 15, - 'e': 50, 'j': 15}, + {'0': 50, '1': 50, '2': 50, '3': 50, + '4': 50, '5': 50, '6': 50, '7': 50, + '8': 50, '9': 50, 'E': 51, 'J': 15, + 'e': 51, 'j': 15}, # 29 - {'+': 51, '-': 51, '0': 52, '1': 52, - '2': 52, '3': 52, '4': 52, '5': 52, - '6': 52, '7': 52, '8': 52, '9': 52}, + {'+': 52, '-': 52, '0': 53, '1': 53, + '2': 53, '3': 53, '4': 53, '5': 53, + '6': 53, '7': 53, '8': 53, '9': 53}, # 30 - {'0': 53, '1': 53, '2': 53, '3': 53, - '4': 53, '5': 53, '6': 53, '7': 53, - '8': 53, '9': 53}, + {'0': 54, '1': 54, '2': 54, '3': 54, + '4': 54, '5': 54, '6': 54, '7': 54, + '8': 54, '9': 54}, # 31 {'0': 31, '1': 31, '2': 31, '3': 31, '4': 31, '5': 31, '6': 31, '7': 31, - '8': 31, '9': 31, 'E': 50, 'J': 15, - '_': 54, 'e': 50, 'j': 15}, + '8': 31, '9': 31, 'E': 51, 'J': 15, + '_': 55, 'e': 51, 'j': 15}, # 32 {'.': 15}, # 33 @@ -218,14 +219,14 @@ # 35 {"'": 15}, # 36 - {automata.DEFAULT: 55, '\n': 15, '\r': 16}, + {automata.DEFAULT: 56, '\n': 15, '\r': 16}, # 37 {automata.DEFAULT: 37, '\n': 34, '\r': 34, "'": 15, '\\': 36}, # 38 {'"': 15}, # 39 - {automata.DEFAULT: 56, '\n': 15, '\r': 16}, + {automata.DEFAULT: 57, '\n': 15, '\r': 16}, # 40 {automata.DEFAULT: 40, '\n': 34, '\r': 34, '"': 15, '\\': 39}, @@ -234,26 +235,26 @@ '4': 41, '5': 41, '6': 41, '7': 41, '8': 41, '9': 41, 'A': 41, 'B': 41, 'C': 41, 'D': 41, 'E': 41, 'F': 41, - '_': 57, 'a': 41, 'b': 41, 'c': 41, + '_': 58, 'a': 41, 'b': 41, 'c': 41, 'd': 41, 'e': 41, 'f': 41}, # 42 - {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58, - '8': 58, '9': 58, 'A': 58, 'B': 58, - 'C': 58, 'D': 58, 'E': 58, 'F': 58, - 'a': 58, 'b': 58, 'c': 58, 'd': 58, - 'e': 58, 'f': 58}, + {'0': 59, '1': 59, '2': 59, '3': 59, + '4': 59, '5': 59, '6': 59, '7': 59, + '8': 59, '9': 59, 'A': 59, 'B': 59, + 'C': 59, 'D': 59, 'E': 59, 'F': 59, + 'a': 59, 'b': 59, 'c': 59, 'd': 59, + 'e': 59, 'f': 59}, # 43 {'0': 43, '1': 43, '2': 43, '3': 43, '4': 43, '5': 43, '6': 43, '7': 43, - '_': 59}, + '_': 60}, # 44 - {'0': 60, '1': 60, '2': 60, '3': 60, - '4': 60, '5': 60, '6': 60, '7': 60}, + {'0': 61, '1': 61, '2': 61, '3': 61, + '4': 61, '5': 61, '6': 61, '7': 61}, # 45 - {'0': 45, '1': 45, '_': 61}, + {'0': 45, '1': 45, '_': 62}, # 46 - {'0': 62, '1': 62}, + {'0': 63, '1': 63}, # 47 {'.': 28, '0': 47, '1': 48, '2': 48, '3': 48, '4': 48, '5': 48, '6': 48, @@ -269,127 +270,141 @@ '4': 48, '5': 48, '6': 48, '7': 48, '8': 48, '9': 48}, # 50 - {'+': 63, '-': 63, '0': 64, '1': 64, - '2': 64, '3': 64, '4': 64, '5': 64, - '6': 64, '7': 64, '8': 64, '9': 64}, + {'0': 50, '1': 50, '2': 50, '3': 50, + '4': 50, '5': 50, '6': 50, '7': 50, + '8': 50, '9': 50, 'E': 51, 'J': 15, + '_': 64, 'e': 51, 'j': 15}, # 51 - {'0': 52, '1': 52, '2': 52, '3': 52, - '4': 52, '5': 52, '6': 52, '7': 52, - '8': 52, '9': 52}, + {'+': 65, '-': 65, '0': 66, '1': 66, + '2': 66, '3': 66, '4': 66, '5': 66, + '6': 66, '7': 66, '8': 66, '9': 66}, # 52 - {'0': 52, '1': 52, '2': 52, '3': 52, - '4': 52, '5': 52, '6': 52, '7': 52, - '8': 52, '9': 52, 'J': 15, '_': 65, + {'0': 53, '1': 53, '2': 53, '3': 53, + '4': 53, '5': 53, '6': 53, '7': 53, + '8': 53, '9': 53}, + # 53 + {'0': 53, '1': 53, '2': 53, '3': 53, + '4': 53, '5': 53, '6': 53, '7': 53, + '8': 53, '9': 53, 'J': 15, '_': 67, 'j': 15}, - # 53 - {'.': 28, '0': 53, '1': 53, '2': 53, - '3': 53, '4': 53, '5': 53, '6': 53, - '7': 53, '8': 53, '9': 53, 'E': 29, + # 54 + {'.': 28, '0': 54, '1': 54, '2': 54, + '3': 54, '4': 54, '5': 54, '6': 54, + '7': 54, '8': 54, '9': 54, 'E': 29, 'J': 15, '_': 30, 'e': 29, 'j': 15}, - # 54 + # 55 + {'0': 68, '1': 68, '2': 68, '3': 68, + '4': 68, '5': 68, '6': 68, '7': 68, + '8': 68, '9': 68}, + # 56 + {automata.DEFAULT: 56, '\n': 34, + '\r': 34, "'": 15, '\\': 36}, + # 57 + {automata.DEFAULT: 57, '\n': 34, + '\r': 34, '"': 15, '\\': 39}, + # 58 + {'0': 69, '1': 69, '2': 69, '3': 69, + '4': 69, '5': 69, '6': 69, '7': 69, + '8': 69, '9': 69, 'A': 69, 'B': 69, + 'C': 69, 'D': 69, 'E': 69, 'F': 69, + 'a': 69, 'b': 69, 'c': 69, 'd': 69, + 'e': 69, 'f': 69}, + # 59 + {'0': 59, '1': 59, '2': 59, '3': 59, + '4': 59, '5': 59, '6': 59, '7': 59, + '8': 59, '9': 59, 'A': 59, 'B': 59, + 'C': 59, 'D': 59, 'E': 59, 'F': 59, + '_': 70, 'a': 59, 'b': 59, 'c': 59, + 'd': 59, 'e': 59, 'f': 59}, + # 60 + {'0': 71, '1': 71, '2': 71, '3': 71, + '4': 71, '5': 71, '6': 71, '7': 71}, + # 61 + {'0': 61, '1': 61, '2': 61, '3': 61, + '4': 61, '5': 61, '6': 61, '7': 61, + '_': 72}, + # 62 + {'0': 73, '1': 73}, + # 63 + {'0': 63, '1': 63, '_': 74}, + # 64 + {'0': 75, '1': 75, '2': 75, '3': 75, + '4': 75, '5': 75, '6': 75, '7': 75, + '8': 75, '9': 75}, + # 65 {'0': 66, '1': 66, '2': 66, '3': 66, '4': 66, '5': 66, '6': 66, '7': 66, '8': 66, '9': 66}, - # 55 - {automata.DEFAULT: 55, '\n': 34, - '\r': 34, "'": 15, '\\': 36}, - # 56 - {automata.DEFAULT: 56, '\n': 34, - '\r': 34, '"': 15, '\\': 39}, - # 57 - {'0': 67, '1': 67, '2': 67, '3': 67, - '4': 67, '5': 67, '6': 67, '7': 67, - '8': 67, '9': 67, 'A': 67, 'B': 67, - 'C': 67, 'D': 67, 'E': 67, 'F': 67, - 'a': 67, 'b': 67, 'c': 67, 'd': 67, - 'e': 67, 'f': 67}, - # 58 - {'0': 58, '1': 58, '2': 58, '3': 58, - '4': 58, '5': 58, '6': 58, '7': 58, - '8': 58, '9': 58, 'A': 58, 'B': 58, - 'C': 58, 'D': 58, 'E': 58, 'F': 58, - '_': 68, 'a': 58, 'b': 58, 'c': 58, - 'd': 58, 'e': 58, 'f': 58}, - # 59 - {'0': 69, '1': 69, '2': 69, '3': 69, - '4': 69, '5': 69, '6': 69, '7': 69}, - # 60 - {'0': 60, '1': 60, '2': 60, '3': 60, - '4': 60, '5': 60, '6': 60, '7': 60, - '_': 70}, - # 61 - {'0': 71, '1': 71}, - # 62 - {'0': 62, '1': 62, '_': 72}, - # 63 - {'0': 64, '1': 64, '2': 64, '3': 64, - '4': 64, '5': 64, '6': 64, '7': 64, - '8': 64, '9': 64}, - # 64 - {'0': 64, '1': 64, '2': 64, '3': 64, - '4': 64, '5': 64, '6': 64, '7': 64, - '8': 64, '9': 64, 'J': 15, '_': 73, - 'j': 15}, - # 65 - {'0': 74, '1': 74, '2': 74, '3': 74, - '4': 74, '5': 74, '6': 74, '7': 74, - '8': 74, '9': 74}, # 66 {'0': 66, '1': 66, '2': 66, '3': 66, '4': 66, '5': 66, '6': 66, '7': 66, - '8': 66, '9': 66, 'E': 50, 'J': 15, - '_': 54, 'e': 50, 'j': 15}, + '8': 66, '9': 66, 'J': 15, '_': 76, + 'j': 15}, # 67 - {'0': 67, '1': 67, '2': 67, '3': 67, - '4': 67, '5': 67, '6': 67, '7': 67, - '8': 67, '9': 67, 'A': 67, 'B': 67, - 'C': 67, 'D': 67, 'E': 67, 'F': 67, - '_': 57, 'a': 67, 'b': 67, 'c': 67, - 'd': 67, 'e': 67, 'f': 67}, + {'0': 77, '1': 77, '2': 77, '3': 77, + '4': 77, '5': 77, '6': 77, '7': 77, + '8': 77, '9': 77}, # 68 - {'0': 75, '1': 75, '2': 75, '3': 75, - '4': 75, '5': 75, '6': 75, '7': 75, - '8': 75, '9': 75, 'A': 75, 'B': 75, - 'C': 75, 'D': 75, 'E': 75, 'F': 75, - 'a': 75, 'b': 75, 'c': 75, 'd': 75, - 'e': 75, 'f': 75}, + {'0': 68, '1': 68, '2': 68, '3': 68, + '4': 68, '5': 68, '6': 68, '7': 68, + '8': 68, '9': 68, 'E': 51, 'J': 15, + '_': 55, 'e': 51, 'j': 15}, # 69 {'0': 69, '1': 69, '2': 69, '3': 69, '4': 69, '5': 69, '6': 69, '7': 69, - '_': 59}, + '8': 69, '9': 69, 'A': 69, 'B': 69, + 'C': 69, 'D': 69, 'E': 69, 'F': 69, + '_': 58, 'a': 69, 'b': 69, 'c': 69, + 'd': 69, 'e': 69, 'f': 69}, # 70 - {'0': 76, '1': 76, '2': 76, '3': 76, - '4': 76, '5': 76, '6': 76, '7': 76}, - # 71 - {'0': 71, '1': 71, '_': 61}, - # 72 - {'0': 77, '1': 77}, - # 73 {'0': 78, '1': 78, '2': 78, '3': 78, '4': 78, '5': 78, '6': 78, '7': 78, - '8': 78, '9': 78}, + '8': 78, '9': 78, 'A': 78, 'B': 78, + 'C': 78, 'D': 78, 'E': 78, 'F': 78, + 'a': 78, 'b': 78, 'c': 78, 'd': 78, + 'e': 78, 'f': 78}, + # 71 + {'0': 71, '1': 71, '2': 71, '3': 71, + '4': 71, '5': 71, '6': 71, '7': 71, + '_': 60}, + # 72 + {'0': 79, '1': 79, '2': 79, '3': 79, + '4': 79, '5': 79, '6': 79, '7': 79}, + # 73 + {'0': 73, '1': 73, '_': 62}, # 74 - {'0': 74, '1': 74, '2': 74, '3': 74, - '4': 74, '5': 74, '6': 74, '7': 74, - '8': 74, '9': 74, 'J': 15, '_': 65, - 'j': 15}, + {'0': 80, '1': 80}, # 75 {'0': 75, '1': 75, '2': 75, '3': 75, '4': 75, '5': 75, '6': 75, '7': 75, - '8': 75, '9': 75, 'A': 75, 'B': 75, - 'C': 75, 'D': 75, 'E': 75, 'F': 75, - '_': 68, 'a': 75, 'b': 75, 'c': 75, - 'd': 75, 'e': 75, 'f': 75}, + '8': 75, '9': 75, 'E': 51, 'J': 15, + '_': 64, 'e': 51, 'j': 15}, # 76 - {'0': 76, '1': 76, '2': 76, '3': 76, - '4': 76, '5': 76, '6': 76, '7': 76, - '_': 70}, + {'0': 81, '1': 81, '2': 81, '3': 81, + '4': 81, '5': 81, '6': 81, '7': 81, + '8': 81, '9': 81}, # 77 - {'0': 77, '1': 77, '_': 72}, + {'0': 77, '1': 77, '2': 77, '3': 77, + '4': 77, '5': 77, '6': 77, '7': 77, + '8': 77, '9': 77, 'J': 15, '_': 67, + 'j': 15}, # 78 {'0': 78, '1': 78, '2': 78, '3': 78, '4': 78, '5': 78, '6': 78, '7': 78, - '8': 78, '9': 78, 'J': 15, '_': 73, + '8': 78, '9': 78, 'A': 78, 'B': 78, + 'C': 78, 'D': 78, 'E': 78, 'F': 78, + '_': 70, 'a': 78, 'b': 78, 'c': 78, + 'd': 78, 'e': 78, 'f': 78}, + # 79 + {'0': 79, '1': 79, '2': 79, '3': 79, + '4': 79, '5': 79, '6': 79, '7': 79, + '_': 72}, + # 80 + {'0': 80, '1': 80, '_': 74}, + # 81 + {'0': 81, '1': 81, '2': 81, '3': 81, + '4': 81, '5': 81, '6': 81, '7': 81, + '8': 81, '9': 81, 'J': 15, '_': 76, 'j': 15}, ] pseudoDFA = automata.DFA(states, accepts) diff --git a/pypy/interpreter/pyparser/gendfa.py b/pypy/interpreter/pyparser/gendfa.py --- a/pypy/interpreter/pyparser/gendfa.py +++ b/pypy/interpreter/pyparser/gendfa.py @@ -113,7 +113,7 @@ chain(states, makeDigitsChain(), newArcPair(states, "."), - any(states, makeDigits())), + any(states, makeDigitsChain())), chain(states, newArcPair(states, "."), makeDigitsChain())) diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -252,6 +252,13 @@ '.1_4j', '(1_2.5+3_3j)', '(.5_6j)', + '.2_3', + '.2_3e4', + '1.2_3', + '1.2_3_4', + '12.000_400', + '1_2.3_4', + '1_2.3_4e5_6', ] INVALID_UNDERSCORE_LITERALS = [ # Trailing underscores: @@ -305,6 +312,13 @@ # Complex cases with parens: '(1+1.5_j_)', '(1+1.5_j)', + # Extra underscores around decimal part + '._3', + '._3e4', + '1.2_', + '1._3_4', + '12._', + '1_2._3', ] for x in VALID_UNDERSCORE_LITERALS: tree = self.parse(x) From pypy.commits at gmail.com Tue Sep 25 12:25:36 2018 From: pypy.commits at gmail.com (rlamy) Date: Tue, 25 Sep 2018 09:25:36 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Merged in davidcellis/pypy/py3.6 (pull request #629) Message-ID: <5baa6180.1c69fb81.66abb.6a97@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95166:0766e29baf22 Date: 2018-09-25 16:24 +0000 http://bitbucket.org/pypy/pypy/changeset/0766e29baf22/ Log: Merged in davidcellis/pypy/py3.6 (pull request #629) Fix failing test in test_float diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -219,16 +219,16 @@ w_value = w_x # 'x' is the keyword argument name in CPython if space.lookup(w_value, "__float__") is not None: w_obj = space.float(w_value) - if space.is_w(w_floattype, space.w_float): - w_obj_type = space.type(w_obj) - if not space.is_w(w_obj_type, space.w_float): - space.warn(space.newtext( - "%s.__float__ returned non-float (type %s). " - "The ability to return an instance of a strict subclass " - "of float is deprecated, and may be removed " - "in a future version of Python." % - (space.type(w_value).name, w_obj_type.name)), - space.w_DeprecationWarning) + w_obj_type = space.type(w_obj) + if not space.is_w(w_obj_type, space.w_float): + space.warn(space.newtext( + "%s.__float__ returned non-float (type %s). " + "The ability to return an instance of a strict subclass " + "of float is deprecated, and may be removed " + "in a future version of Python." % + (space.type(w_value).name, w_obj_type.name)), + space.w_DeprecationWarning) + elif space.is_w(w_floattype, space.w_float): return w_obj value = space.float_w(w_obj) elif space.isinstance_w(w_value, space.w_unicode): From pypy.commits at gmail.com Tue Sep 25 12:25:47 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Tue, 25 Sep 2018 09:25:47 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Coerce float subclasses returned by __float__ to float class Message-ID: <5baa618b.1c69fb81.c55ad.f944@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95162:b7814a6b95eb Date: 2018-09-19 19:31 +0100 http://bitbucket.org/pypy/pypy/changeset/b7814a6b95eb/ Log: Coerce float subclasses returned by __float__ to float class First working version diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -229,6 +229,10 @@ "in a future version of Python." % (space.type(w_value).name, w_obj_type.name)), space.w_DeprecationWarning) + # Convert to a non-subclass float + value = w_obj.floatval + w_obj = space.allocate_instance(W_FloatObject, w_floattype) + W_FloatObject.__init__(w_obj, value) return w_obj value = space.float_w(w_obj) elif space.isinstance_w(w_value, space.w_unicode): From pypy.commits at gmail.com Tue Sep 25 12:25:49 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Tue, 25 Sep 2018 09:25:49 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Check isinstance so rpython translates. Message-ID: <5baa618d.1c69fb81.6138.0a8b@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95163:fec283ea730d Date: 2018-09-20 00:56 +0100 http://bitbucket.org/pypy/pypy/changeset/fec283ea730d/ Log: Check isinstance so rpython translates. diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -229,10 +229,11 @@ "in a future version of Python." % (space.type(w_value).name, w_obj_type.name)), space.w_DeprecationWarning) - # Convert to a non-subclass float - value = w_obj.floatval - w_obj = space.allocate_instance(W_FloatObject, w_floattype) - W_FloatObject.__init__(w_obj, value) + if isinstance(w_obj, W_FloatObject): + # Convert to a non-subclass float + value = w_obj.floatval + w_obj = space.allocate_instance(W_FloatObject, w_floattype) + W_FloatObject.__init__(w_obj, value) return w_obj value = space.float_w(w_obj) elif space.isinstance_w(w_value, space.w_unicode): From pypy.commits at gmail.com Tue Sep 25 12:25:51 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Tue, 25 Sep 2018 09:25:51 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Warning should be raised even if it's a float subclass Message-ID: <5baa618f.1c69fb81.c4648.ed5d@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95164:8feed387bf12 Date: 2018-09-24 14:19 +0100 http://bitbucket.org/pypy/pypy/changeset/8feed387bf12/ Log: Warning should be raised even if it's a float subclass diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -219,16 +219,17 @@ w_value = w_x # 'x' is the keyword argument name in CPython if space.lookup(w_value, "__float__") is not None: w_obj = space.float(w_value) + w_obj_type = space.type(w_obj) + if not space.is_w(w_obj_type, space.w_float): + space.warn(space.newtext( + "%s.__float__ returned non-float (type %s). " + "The ability to return an instance of a strict subclass " + "of float is deprecated, and may be removed " + "in a future version of Python." % + (space.type(w_value).name, w_obj_type.name)), + space.w_DeprecationWarning) if space.is_w(w_floattype, space.w_float): - w_obj_type = space.type(w_obj) if not space.is_w(w_obj_type, space.w_float): - space.warn(space.newtext( - "%s.__float__ returned non-float (type %s). " - "The ability to return an instance of a strict subclass " - "of float is deprecated, and may be removed " - "in a future version of Python." % - (space.type(w_value).name, w_obj_type.name)), - space.w_DeprecationWarning) if isinstance(w_obj, W_FloatObject): # Convert to a non-subclass float value = w_obj.floatval From pypy.commits at gmail.com Tue Sep 25 12:25:53 2018 From: pypy.commits at gmail.com (David C Ellis) Date: Tue, 25 Sep 2018 09:25:53 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: Clean up unnecessary logic. Message-ID: <5baa6191.1c69fb81.2392a.8cad@mx.google.com> Author: David C Ellis Branch: py3.6 Changeset: r95165:dc9667baf57c Date: 2018-09-25 13:15 +0100 http://bitbucket.org/pypy/pypy/changeset/dc9667baf57c/ Log: Clean up unnecessary logic. space.float_w does what was needed in the case the object is a subclass of float. elif means subclasses fall through and get converted instead of returned. diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -228,13 +228,7 @@ "in a future version of Python." % (space.type(w_value).name, w_obj_type.name)), space.w_DeprecationWarning) - if space.is_w(w_floattype, space.w_float): - if not space.is_w(w_obj_type, space.w_float): - if isinstance(w_obj, W_FloatObject): - # Convert to a non-subclass float - value = w_obj.floatval - w_obj = space.allocate_instance(W_FloatObject, w_floattype) - W_FloatObject.__init__(w_obj, value) + elif space.is_w(w_floattype, space.w_float): return w_obj value = space.float_w(w_obj) elif space.isinstance_w(w_value, space.w_unicode): From pypy.commits at gmail.com Tue Sep 25 14:18:58 2018 From: pypy.commits at gmail.com (rlamy) Date: Tue, 25 Sep 2018 11:18:58 -0700 (PDT) Subject: [pypy-commit] pypy py3.6: hg merge py3.5 Message-ID: <5baa7c12.1c69fb81.25484.68d6@mx.google.com> Author: Ronan Lamy Branch: py3.6 Changeset: r95167:6ffaea666f35 Date: 2018-09-25 19:18 +0100 http://bitbucket.org/pypy/pypy/changeset/6ffaea666f35/ Log: hg merge py3.5 diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Welcome to PyPy! -PyPy is an interperter that implements the Python programming language, based +PyPy is an interpreter that implements the Python programming language, based on the RPython compiler framework for dynamic language implementations. The home page for the interpreter is: @@ -15,29 +15,29 @@ http://doc.pypy.org/ -More documentation about the RPython framework can be found here +More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io + http://rpython.readthedocs.io/ -The source for the documentation is in the pypy/doc directory +The source for the documentation is in the pypy/doc directory. + Using PyPy instead of CPython -============================= +----------------------------- -Please read the information at http://pypy.org to find the correct way to +Please read the information at http://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. + Building -======== +-------- Building PyPy is not the recommended way to obtain the PyPy alternative python interpreter. It is time-consuming and requires significant computing resources. -More information can be found here +More information can be found here: http://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! the pypy-dev team - - diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -61,6 +61,9 @@ git = mod.StupidGit() +class ExampleClassWithSlot(object): + __slots__ = 'myslot' + class IsTestBase(unittest.TestCase): predicates = set([inspect.isbuiltin, inspect.isclass, inspect.iscode, inspect.isframe, inspect.isfunction, inspect.ismethod, @@ -141,8 +144,11 @@ self.istest(inspect.iscoroutinefunction, 'coroutine_function_example') if hasattr(types, 'MemberDescriptorType'): - self.istest(inspect.ismemberdescriptor, - 'type(lambda: None).__globals__') + # App-level slots are member descriptors on both PyPy and + # CPython, but the various built-in attributes are all + # getsetdescriptors on PyPy. So check ismemberdescriptor() + # with an app-level slot. + self.istest(inspect.ismemberdescriptor, 'ExampleClassWithSlot.myslot') else: self.assertFalse(inspect.ismemberdescriptor(datetime.timedelta.days)) diff --git a/lib_pypy/_ctypes/function.py b/lib_pypy/_ctypes/function.py --- a/lib_pypy/_ctypes/function.py +++ b/lib_pypy/_ctypes/function.py @@ -486,6 +486,8 @@ return cobj, cobj._to_ffi_param(), type(cobj) def _convert_args_for_callback(self, argtypes, args): + from _ctypes.structure import StructOrUnion + # assert len(argtypes) == len(args) newargs = [] for argtype, arg in zip(argtypes, args): @@ -495,6 +497,10 @@ param = param._get_buffer_value() elif self._is_primitive(argtype): param = param.value + elif isinstance(param, StructOrUnion): # not a *pointer* to struct + newparam = StructOrUnion.__new__(type(param)) + param._copy_to(newparam._buffer.buffer) + param = newparam newargs.append(param) return newargs diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -162,6 +162,17 @@ module_path = module_name.split('.') module_path[-1] += '.py' generate_mod(os.path.join(self.build_lib, *module_path)) + def get_source_files(self): + # This is called from 'setup.py sdist' only. Exclude + # the generate .py module in this case. + saved_py_modules = self.py_modules + try: + if saved_py_modules: + self.py_modules = [m for m in saved_py_modules + if m != module_name] + return base_class.get_source_files(self) + finally: + self.py_modules = saved_py_modules dist.cmdclass['build_py'] = build_py_make_mod # distutils and setuptools have no notion I could find of a @@ -171,6 +182,7 @@ # the module. So we add it here, which gives a few apparently # harmless warnings about not finding the file outside the # build directory. + # Then we need to hack more in get_source_files(); see above. if dist.py_modules is None: dist.py_modules = [] dist.py_modules.append(module_name) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -37,6 +37,8 @@ Small refactorings in the Python parser. +.. branch: fix-readme-typo + .. branch: py3.6-wordcode implement new wordcode instruction encoding on the 3.6 branch diff --git a/pypy/module/__builtin__/functional.py b/pypy/module/__builtin__/functional.py --- a/pypy/module/__builtin__/functional.py +++ b/pypy/module/__builtin__/functional.py @@ -108,101 +108,99 @@ max_jitdriver = jit.JitDriver(name='max', greens=['has_key', 'has_item', 'w_type'], reds='auto') -def make_min_max(unroll): - @specialize.arg(2) - def min_max_impl(space, args, implementation_of): - if implementation_of == "max": - compare = space.gt - jitdriver = max_jitdriver + at specialize.arg(4) +def min_max_sequence(space, w_sequence, w_key, w_default, implementation_of): + if implementation_of == "max": + compare = space.gt + jitdriver = max_jitdriver + else: + compare = space.lt + jitdriver = min_jitdriver + w_iter = space.iter(w_sequence) + w_type = space.type(w_iter) + has_key = w_key is not None + has_item = False + w_max_item = w_default + w_max_val = None + while True: + jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, + w_type=w_type) + try: + w_item = space.next(w_iter) + except OperationError as e: + if not e.match(space, space.w_StopIteration): + raise + break + if has_key: + w_compare_with = space.call_function(w_key, w_item) else: - compare = space.lt - jitdriver = min_jitdriver - any_kwds = bool(args.keywords) - args_w = args.arguments_w - if len(args_w) > 1: - if unroll and len(args_w) == 2 and not any_kwds: - # a fast path for the common case, useful for interpreted - # mode and to reduce the length of the jit trace - w0, w1 = args_w - if space.is_true(compare(w1, w0)): - return w1 - else: - return w0 - w_sequence = space.newtuple(args_w) - elif len(args_w): - w_sequence = args_w[0] + w_compare_with = w_item + if (not has_item or + space.is_true(compare(w_compare_with, w_max_val))): + has_item = True + w_max_item = w_item + w_max_val = w_compare_with + if w_max_item is None: + raise oefmt(space.w_ValueError, "arg is an empty sequence") + return w_max_item + + at specialize.arg(3) + at jit.look_inside_iff(lambda space, args_w, w_key, implementation_of: + jit.loop_unrolling_heuristic(args_w, len(args_w), 3)) +def min_max_multiple_args(space, args_w, w_key, implementation_of): + # case of multiple arguments (at least two). We unroll it if there + # are 2 or 3 arguments. + if implementation_of == "max": + compare = space.gt + else: + compare = space.lt + w_max_item = args_w[0] + if w_key is not None: + w_max_val = space.call_function(w_key, w_max_item) + else: + w_max_val = w_max_item + for i in range(1, len(args_w)): + w_item = args_w[i] + if w_key is not None: + w_compare_with = space.call_function(w_key, w_item) else: - raise oefmt(space.w_TypeError, - "%s() expects at least one argument", - implementation_of) - w_key = None - w_default = None - if any_kwds: - kwds = args.keywords - for n in range(len(kwds)): - if kwds[n] == "key": - w_key = args.keywords_w[n] - elif kwds[n] == "default": - w_default = args.keywords_w[n] - else: - raise oefmt(space.w_TypeError, - "%s() got unexpected keyword argument", - implementation_of) + w_compare_with = w_item + if space.is_true(compare(w_compare_with, w_max_val)): + w_max_item = w_item + w_max_val = w_compare_with + return w_max_item - if w_default is not None and len(args_w) > 1: - raise oefmt(space.w_TypeError, - "Cannot specify a default for %s() with multiple positional arguments", - implementation_of) - - w_iter = space.iter(w_sequence) - w_type = space.type(w_iter) - has_key = w_key is not None - has_item = False - w_max_item = None - w_max_val = None - while True: - if not unroll: - jitdriver.jit_merge_point(has_key=has_key, has_item=has_item, w_type=w_type) - try: - w_item = space.next(w_iter) - except OperationError as e: - if not e.match(space, space.w_StopIteration): - raise - break - if has_key: - w_compare_with = space.call_function(w_key, w_item) - else: - w_compare_with = w_item - if not has_item or \ - space.is_true(compare(w_compare_with, w_max_val)): - has_item = True - w_max_item = w_item - w_max_val = w_compare_with - if w_max_item is None: - if w_default is not None: - w_max_item = w_default - else: - raise oefmt(space.w_ValueError, "arg is an empty sequence") - return w_max_item - if unroll: - min_max_impl = jit.unroll_safe(min_max_impl) - return min_max_impl - -min_max_unroll = make_min_max(True) -min_max_normal = make_min_max(False) - + at jit.unroll_safe # the loop over kwds @specialize.arg(2) def min_max(space, args, implementation_of): - # the 'normal' version includes a JIT merge point, which will make a - # new loop (from the interpreter or from another JIT loop). If we - # give exactly two arguments to the call to max(), or a JIT virtual - # list of arguments, then we pick the 'unroll' version with no JIT - # merge point. - if jit.isvirtual(args.arguments_w) or len(args.arguments_w) == 2: - return min_max_unroll(space, args, implementation_of) + w_key = None + w_default = None + if bool(args.keywords): + kwds = args.keywords + for n in range(len(kwds)): + if kwds[n] == "key": + w_key = args.keywords_w[n] + elif kwds[n] == "default": + w_default = args.keywords_w[n] + else: + raise oefmt(space.w_TypeError, + "%s() got unexpected keyword argument", + implementation_of) + # + args_w = args.arguments_w + if len(args_w) > 1: + if w_default is not None: + raise oefmt(space.w_TypeError, + "Cannot specify a default for %s() with multiple " + "positional arguments", implementation_of) + return min_max_multiple_args(space, args_w, w_key, implementation_of) + elif len(args_w): + return min_max_sequence(space, args_w[0], w_key, w_default, + implementation_of) else: - return min_max_normal(space, args, implementation_of) -min_max._always_inline = True + raise oefmt(space.w_TypeError, + "%s() expects at least one argument", + implementation_of) def max(space, __args__): """max(iterable[, key=func]) -> value diff --git a/pypy/module/__builtin__/test/test_functional.py b/pypy/module/__builtin__/test/test_functional.py --- a/pypy/module/__builtin__/test/test_functional.py +++ b/pypy/module/__builtin__/test/test_functional.py @@ -654,6 +654,10 @@ raises(TypeError, max, 1, default=0) raises(TypeError, max, default=1) + def test_max_list_and_key(self): + assert max(["100", "50", "30", "-200"], key=int) == "100" + assert max("100", "50", "30", "-200", key=int) == "100" + try: from hypothesis import given, strategies, example diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -317,9 +317,7 @@ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool): self._must_be_string_of_zero_or_one(value) keepalives[i] = value - buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value) - rffi.cast(rffi.CCHARPP, cdata)[0] = buf - return ord(buf_flag) # 4, 5 or 6 + return misc.write_string_as_charp(cdata, value) # if (space.isinstance_w(w_init, space.w_list) or space.isinstance_w(w_init, space.w_tuple)): diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -3,7 +3,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit -from rpython.rlib.objectmodel import specialize +from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.rarithmetic import r_uint, r_ulonglong from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, llmemory, rffi @@ -102,6 +102,12 @@ def write_raw_longdouble_data(target, source): rffi.cast(rffi.LONGDOUBLEP, target)[0] = source + at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined +def write_string_as_charp(target, string): + buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string) + rffi.cast(rffi.CCHARPP, target)[0] = buf + return ord(buf_flag) # 4, 5 or 6 + # ____________________________________________________________ sprintf_longdouble = rffi.llexternal( @@ -129,21 +135,14 @@ # This version accepts a Python int too, and does convertions from # other types of objects. It refuses floats. try: - value = space.int_w(w_ob) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise - else: - return value - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + bigint = space.bigint_w(w_ob, allow_conversion=True) try: return bigint.tolonglong() except OverflowError: @@ -151,20 +150,15 @@ def as_long(space, w_ob): # Same as as_long_long(), but returning an int instead. - if space.is_w(space.type(w_ob), space.w_int): # shortcut - return space.int_w(w_ob) try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + return space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) - try: - return bigint.toint() - except OverflowError: - raise OperationError(space.w_OverflowError, space.newtext(ovf_msg)) + return space.int_w(w_ob, allow_conversion=True) def as_unsigned_long_long(space, w_ob, strict): # (possibly) convert and cast a Python object to an unsigned long long. @@ -172,23 +166,19 @@ # objects. If 'strict', complains with OverflowError; if 'not strict', # mask the result and round floats. try: - value = space.int_w(w_ob) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: if not (e.match(space, space.w_OverflowError) or e.match(space, space.w_TypeError)): raise + if strict and _is_a_float(space, w_ob): + raise else: if strict and value < 0: raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) return r_ulonglong(value) - try: - bigint = space.bigint_w(w_ob, allow_conversion=False) - except OperationError as e: - if not e.match(space, space.w_TypeError): - raise - if strict and _is_a_float(space, w_ob): - raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.toulonglong() @@ -202,13 +192,19 @@ def as_unsigned_long(space, w_ob, strict): # same as as_unsigned_long_long(), but returning just an Unsigned try: - bigint = space.bigint_w(w_ob, allow_conversion=False) + value = space.int_w(w_ob, allow_conversion=False) except OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_OverflowError) or + e.match(space, space.w_TypeError)): raise if strict and _is_a_float(space, w_ob): raise - bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) + else: + if strict and value < 0: + raise OperationError(space.w_OverflowError, space.newtext(neg_msg)) + return r_uint(value) + # note that if not 'strict', then space.int() will round down floats + bigint = space.bigint_w(space.int(w_ob), allow_conversion=False) if strict: try: return bigint.touint() @@ -241,7 +237,12 @@ def _standard_object_as_bool(space, w_ob): if space.isinstance_w(w_ob, space.w_int): - return space.bigint_w(w_ob).tobool() + try: + return space.int_w(w_ob) != 0 + except OperationError as e: + if not e.match(space, space.w_OverflowError): + raise + return space.bigint_w(w_ob).tobool() if space.isinstance_w(w_ob, space.w_float): return space.float_w(w_ob) != 0.0 raise _NotStandardObject diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -334,8 +334,18 @@ max = (1 << (8*size-1)) - 1 assert newp(pp, min)[0] == min assert newp(pp, max)[0] == max + py.test.raises(OverflowError, newp, pp, min - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 2 ** 64) py.test.raises(OverflowError, newp, pp, min - 1) py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32) + py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64) + py.test.raises(OverflowError, newp, pp, max + 1) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32) + py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64) + py.test.raises(TypeError, newp, pp, 1.0) for name in ['char', 'short', 'int', 'long', 'long long']: p = new_primitive_type('unsigned ' + name) pp = new_pointer_type(p) @@ -3947,6 +3957,7 @@ z3 = cast(BVoidP, 0) z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 newp(new_pointer_type(BVoidP), z1) # fine diff --git a/pypy/module/pypyjit/test_pypy_c/test_min_max.py b/pypy/module/pypyjit/test_pypy_c/test_min_max.py --- a/pypy/module/pypyjit/test_pypy_c/test_min_max.py +++ b/pypy/module/pypyjit/test_pypy_c/test_min_max.py @@ -30,37 +30,42 @@ sa = 0 while i < 30000: lst = range(i % 1000 + 2) - sa += max(*lst) # ID: max + sa += max(*lst) # ID: callmax i += 1 return sa log = self.run(main, []) assert log.result == main() loop, = log.loops_by_filename(self.filepath) - assert loop.match(""" + assert loop.match_by_id('callmax', """ ... - p76 = call_assembler_r(..., descr=...) + p76 = call_may_force_r(_, _, _, _, descr=...) ... """) - loop2 = log.loops[0] - loop2.match(''' - ... - label(..., descr=...) - ... - label(..., descr=...) - guard_not_invalidated? - i17 = int_ge(i11, i7) - guard_false(i17, descr=...) - p18 = getarrayitem_gc_r(p5, i11, descr=...) - i19 = int_add(i11, 1) - setfield_gc(p2, i19, descr=...) - guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) - i20 = getfield_gc_i(p18, descr=...) - i21 = int_gt(i20, i14) - guard_true(i21, descr=...) - jump(..., descr=...) - ''') - # XXX could be "guard_class(p18)" instead; we lost somewhere - # the information that it cannot be null. + + #----- the following logic used to check the content of the assembly + #----- generated for the loop in max(), but now we no longer produce + #----- any custom assembly in this case. It used to say + #----- 'call_assembler_r' above, and now it says 'call_may_force_r'. + #loop2 = log.loops[0] + #loop2.match(''' + #... + #label(..., descr=...) + #... + #label(..., descr=...) + #guard_not_invalidated? + #i17 = int_ge(i11, i7) + #guard_false(i17, descr=...) + #p18 = getarrayitem_gc_r(p5, i11, descr=...) + #i19 = int_add(i11, 1) + #setfield_gc(p2, i19, descr=...) + #guard_nonnull_class(p18, ConstClass(W_IntObject), descr=...) + #i20 = getfield_gc_i(p18, descr=...) + #i21 = int_gt(i20, i14) + #guard_true(i21, descr=...) + #jump(..., descr=...) + #''') + ## XXX could be "guard_class(p18)" instead; we lost somewhere + ## the information that it cannot be null. def test_iter_max(self): def main(): diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/backend_tests.py @@ -1387,6 +1387,7 @@ ffi = FFI(backend=self.Backend()) ffi.cdef("enum foo;") with warnings.catch_warnings(record=True) as log: + warnings.simplefilter("always") n = ffi.cast("enum foo", -1) assert int(n) == 0xffffffff assert str(log[0].message) == ( diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_function.py @@ -46,14 +46,14 @@ assert x != math.sin(1.23) # rounding effects assert abs(x - math.sin(1.23)) < 1E-6 - def test_sin_no_return_value(self): + def test_lround_no_return_value(self): # check that 'void'-returning functions work too ffi = FFI(backend=self.Backend()) ffi.cdef(""" - void sin(double x); + void lround(double x); """) m = ffi.dlopen(lib_m) - x = m.sin(1.23) + x = m.lround(1.23) assert x is None def test_dlopen_filename(self): diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py --- a/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py +++ b/pypy/module/test_lib_pypy/ctypes_tests/test_callbacks.py @@ -160,15 +160,17 @@ proto = CFUNCTYPE(c_int, RECT) def callback(point): + point.left *= -1 return point.left+point.top+point.right+point.bottom cbp = proto(callback) - rect = RECT(1000,100,10,1) + rect = RECT(-1000,100,10,1) res = cbp(rect) assert res == 1111 + assert rect.left == -1000 # must not have been changed! def test_callback_from_c_with_struct_argument(self): import conftest diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -186,7 +186,7 @@ self.cc = cc # detect version of current compiler - returncode, stdout, stderr = _run_subprocess(self.cc, '', + returncode, stdout, stderr = _run_subprocess(self.cc, [], env=self.c_environ) r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr) if r is not None: @@ -196,7 +196,7 @@ self.version = 0 # Try to find a masm assembler - returncode, stdout, stderr = _run_subprocess('ml.exe', '', + returncode, stdout, stderr = _run_subprocess('ml.exe', [], env=self.c_environ) r = re.search('Macro Assembler', stderr) if r is None and os.path.exists('c:/masm32/bin/ml.exe'): From pypy.commits at gmail.com Fri Sep 28 05:52:42 2018 From: pypy.commits at gmail.com (arigo) Date: Fri, 28 Sep 2018 02:52:42 -0700 (PDT) Subject: [pypy-commit] pypy default: #2895 Test and fix for rposix.getfullpathname() Message-ID: <5badf9ea.1c69fb81.2d8c1.0523@mx.google.com> Author: Armin Rigo Branch: Changeset: r95170:94d034975108 Date: 2018-09-28 11:50 +0200 http://bitbucket.org/pypy/pypy/changeset/94d034975108/ Log: #2895 Test and fix for rposix.getfullpathname() diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -730,16 +730,21 @@ length = rwin32.MAX_PATH + 1 traits = _preferred_traits(path) win32traits = make_win32_traits(traits) - with traits.scoped_alloc_buffer(length) as buf: - res = win32traits.GetFullPathName( - traits.as_str0(path), rffi.cast(rwin32.DWORD, length), - buf.raw, lltype.nullptr(win32traits.LPSTRP.TO)) - if res == 0: - raise rwin32.lastSavedWindowsError("_getfullpathname failed") - result = buf.str(intmask(res)) - assert result is not None - result = rstring.assert_str0(result) - return result + while True: # should run the loop body maximum twice + with traits.scoped_alloc_buffer(length) as buf: + res = win32traits.GetFullPathName( + traits.as_str0(path), rffi.cast(rwin32.DWORD, length), + buf.raw, lltype.nullptr(win32traits.LPSTRP.TO)) + res = intmask(res) + if res == 0: + raise rwin32.lastSavedWindowsError("_getfullpathname failed") + if res >= length: + length = res + 1 + continue + result = buf.str(res) + assert result is not None + result = rstring.assert_str0(result) + return result c_getcwd = external(UNDERSCORE_ON_WIN32 + 'getcwd', [rffi.CCHARP, rffi.SIZE_T], rffi.CCHARP, diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -83,6 +83,14 @@ # the most intriguing failure of ntpath.py should not repeat, here: assert not data.endswith(stuff) + @win_only + def test__getfullpathname_long(self): + stuff = "C:" + "\\abcd" * 100 + py.test.raises(WindowsError, rposix.getfullpathname, stuff) + ustuff = u"C:" + u"\\abcd" * 100 + res = rposix.getfullpathname(ustuff) + assert res == ustuff + def test_getcwd(self): assert rposix.getcwd() == os.getcwd() From pypy.commits at gmail.com Sat Sep 29 01:09:45 2018 From: pypy.commits at gmail.com (arigo) Date: Fri, 28 Sep 2018 22:09:45 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Merged in andrewjlawrence/pypy/py3.5 (pull request #627) Message-ID: <5baf0919.1c69fb81.a2380.bb4a@mx.google.com> Author: Armin Rigo Branch: py3.5 Changeset: r95173:b52420735cc0 Date: 2018-09-29 05:09 +0000 http://bitbucket.org/pypy/pypy/changeset/b52420735cc0/ Log: Merged in andrewjlawrence/pypy/py3.5 (pull request #627) Fixed tests from own own-win-x86-32 pypy.module._io.test.test_fileio Approved-by: Armin Rigo diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py --- a/pypy/module/_io/interp_fileio.py +++ b/pypy/module/_io/interp_fileio.py @@ -196,12 +196,11 @@ wrap_oserror2(space, e, w_name, exception_name='w_IOError', eintr_retry=True) - if not rposix._WIN32: - try: - _open_inhcache.set_non_inheritable(self.fd) - except OSError as e: - raise wrap_oserror2(space, e, w_name, - eintr_retry=False) + try: + _open_inhcache.set_non_inheritable(self.fd) + except OSError as e: + raise wrap_oserror2(space, e, w_name, + eintr_retry=False) else: w_fd = space.call_function(w_opener, w_name, space.newint(flags)) @@ -225,6 +224,7 @@ raise wrap_oserror2(space, e, w_name, eintr_retry=False) + try: st = os.fstat(self.fd) except OSError as e: diff --git a/pypy/module/_io/test/test_fileio.py b/pypy/module/_io/test/test_fileio.py --- a/pypy/module/_io/test/test_fileio.py +++ b/pypy/module/_io/test/test_fileio.py @@ -279,31 +279,34 @@ raises(FileExistsError, _io.FileIO, filename, 'x') def test_non_inheritable(self): - import _io, posix + import _io + os = self.posix f = _io.FileIO(self.tmpfile, 'r') - assert posix.get_inheritable(f.fileno()) == False + assert os.get_inheritable(f.fileno()) == False f.close() def test_FileIO_fd_does_not_change_inheritable(self): - import _io, posix - fd1, fd2 = posix.pipe() - posix.set_inheritable(fd1, True) - posix.set_inheritable(fd2, False) + import _io + os = self.posix + fd1, fd2 = os.pipe() + os.set_inheritable(fd1, True) + os.set_inheritable(fd2, False) f1 = _io.FileIO(fd1, 'r') f2 = _io.FileIO(fd2, 'w') - assert posix.get_inheritable(fd1) == True - assert posix.get_inheritable(fd2) == False + assert os.get_inheritable(fd1) == True + assert os.get_inheritable(fd2) == False f1.close() f2.close() def test_close_upon_reinit(self): - import _io, posix + import _io + os = self.posix f = _io.FileIO(self.tmpfile, 'r') fd1 = f.fileno() f.__init__(self.tmpfile, 'w') fd2 = f.fileno() if fd1 != fd2: - raises(OSError, posix.close, fd1) + raises(OSError, os.close, fd1) def test_opener_negative(self): import _io From pypy.commits at gmail.com Sat Sep 29 01:09:51 2018 From: pypy.commits at gmail.com (andrewjlawrence) Date: Fri, 28 Sep 2018 22:09:51 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Fixed tests from own own-win-x86-32 Message-ID: <5baf091f.1c69fb81.b61a8.e6a5@mx.google.com> Author: andrewjlawrence Branch: py3.5 Changeset: r95171:251058ac9cef Date: 2018-09-19 18:55 +0100 http://bitbucket.org/pypy/pypy/changeset/251058ac9cef/ Log: Fixed tests from own own-win-x86-32 pypy.module._io.test.test_fileio AppTestFileIO.().test_FileIO_fd_does_not_change_inheritable pypy.module._io.test.test_fileio AppTestFileIO.().test_close_upon_reinit pypy.module._io.test.test_fileio AppTestFileIO.().test_non_inheritable Modified FileIO descrinit to set inheritable to false on windows diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py --- a/pypy/module/_io/interp_fileio.py +++ b/pypy/module/_io/interp_fileio.py @@ -202,6 +202,12 @@ except OSError as e: raise wrap_oserror2(space, e, w_name, eintr_retry=False) + else: + try: + rposix.set_inheritable(self.fd, False) + except OSError as e: + raise wrap_oserror2(space, e, w_name, + eintr_retry=False) else: w_fd = space.call_function(w_opener, w_name, space.newint(flags)) @@ -225,6 +231,7 @@ raise wrap_oserror2(space, e, w_name, eintr_retry=False) + try: st = os.fstat(self.fd) except OSError as e: diff --git a/pypy/module/_io/test/test_fileio.py b/pypy/module/_io/test/test_fileio.py --- a/pypy/module/_io/test/test_fileio.py +++ b/pypy/module/_io/test/test_fileio.py @@ -279,31 +279,34 @@ raises(FileExistsError, _io.FileIO, filename, 'x') def test_non_inheritable(self): - import _io, posix + import _io + os = self.posix f = _io.FileIO(self.tmpfile, 'r') - assert posix.get_inheritable(f.fileno()) == False + assert os.get_inheritable(f.fileno()) == False f.close() def test_FileIO_fd_does_not_change_inheritable(self): - import _io, posix - fd1, fd2 = posix.pipe() - posix.set_inheritable(fd1, True) - posix.set_inheritable(fd2, False) + import _io + os = self.posix + fd1, fd2 = os.pipe() + os.set_inheritable(fd1, True) + os.set_inheritable(fd2, False) f1 = _io.FileIO(fd1, 'r') f2 = _io.FileIO(fd2, 'w') - assert posix.get_inheritable(fd1) == True - assert posix.get_inheritable(fd2) == False + assert os.get_inheritable(fd1) == True + assert os.get_inheritable(fd2) == False f1.close() f2.close() def test_close_upon_reinit(self): - import _io, posix + import _io + os = self.posix f = _io.FileIO(self.tmpfile, 'r') fd1 = f.fileno() f.__init__(self.tmpfile, 'w') fd2 = f.fileno() if fd1 != fd2: - raises(OSError, posix.close, fd1) + raises(OSError, os.close, fd1) def test_opener_negative(self): import _io From pypy.commits at gmail.com Sat Sep 29 01:09:53 2018 From: pypy.commits at gmail.com (andrewjlawrence) Date: Fri, 28 Sep 2018 22:09:53 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Tried just removing not windows check. Worked. Message-ID: <5baf0921.1c69fb81.37f1d.628c@mx.google.com> Author: andrewjlawrence Branch: py3.5 Changeset: r95172:4434aa0cd934 Date: 2018-09-28 22:45 +0100 http://bitbucket.org/pypy/pypy/changeset/4434aa0cd934/ Log: Tried just removing not windows check. Worked. diff --git a/pypy/module/_io/interp_fileio.py b/pypy/module/_io/interp_fileio.py --- a/pypy/module/_io/interp_fileio.py +++ b/pypy/module/_io/interp_fileio.py @@ -196,18 +196,11 @@ wrap_oserror2(space, e, w_name, exception_name='w_IOError', eintr_retry=True) - if not rposix._WIN32: - try: - _open_inhcache.set_non_inheritable(self.fd) - except OSError as e: - raise wrap_oserror2(space, e, w_name, - eintr_retry=False) - else: - try: - rposix.set_inheritable(self.fd, False) - except OSError as e: - raise wrap_oserror2(space, e, w_name, - eintr_retry=False) + try: + _open_inhcache.set_non_inheritable(self.fd) + except OSError as e: + raise wrap_oserror2(space, e, w_name, + eintr_retry=False) else: w_fd = space.call_function(w_opener, w_name, space.newint(flags)) From pypy.commits at gmail.com Sun Sep 30 03:45:52 2018 From: pypy.commits at gmail.com (arigo) Date: Sun, 30 Sep 2018 00:45:52 -0700 (PDT) Subject: [pypy-commit] pypy.org extradoc: Add the version number in non-Mac-lingo Message-ID: <5bb07f30.1c69fb81.6add8.0937@mx.google.com> Author: Armin Rigo Branch: extradoc Changeset: r929:abbf3e5ebd7b Date: 2018-09-30 09:45 +0200 http://bitbucket.org/pypy/pypy.org/changeset/abbf3e5ebd7b/ Log: Add the version number in non-Mac-lingo diff --git a/download.html b/download.html --- a/download.html +++ b/download.html @@ -134,7 +134,7 @@
  • Linux x86-64 binary (64bit, built on Ubuntu 12.04 - 16.04) (see [1] below)
  • ARM Hardfloat Linux binary (ARMHF/gnueabihf, Raspbian) (see [1] below)
  • ARM Softfloat Linux binary (ARMEL/gnueabi, Ubuntu Raring) (see [1] below)
  • -
  • Mac OS X binary (64bit) (High Sierra, not for Sierra and below)
  • +
  • Mac OS X binary (64bit) (High Sierra >= 10.13, not for Sierra and below)
  • Windows binary (32bit) BETA
  • s390x Linux binary (built on Redhat Linux 7.2) (see [1] below)
  • Source (tar.bz2); Source (zip). See below for more about the sources.
  • diff --git a/source/download.txt b/source/download.txt --- a/source/download.txt +++ b/source/download.txt @@ -124,7 +124,7 @@ * `Linux x86-64 binary (64bit, built on Ubuntu 12.04 - 16.04)`__ (see ``[1]`` below) * `ARM Hardfloat Linux binary (ARMHF/gnueabihf, Raspbian)`__ (see ``[1]`` below) * `ARM Softfloat Linux binary (ARMEL/gnueabi, Ubuntu Raring)`__ (see ``[1]`` below) -* `Mac OS X binary (64bit)`__ (High Sierra, not for Sierra and below) +* `Mac OS X binary (64bit)`__ (High Sierra >= 10.13, not for Sierra and below) * `Windows binary (32bit)`__ **BETA** * `s390x Linux binary (built on Redhat Linux 7.2)`__ (see ``[1]`` below) * `Source (tar.bz2)`__; `Source (zip)`__. See below for more about the sources.