[pypy-commit] pypy release-pypy3.6-v7.x: merge py3.6 into release

Thu Mar 14 11:20:28 EDT 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: release-pypy3.6-v7.x
Changeset: r96318:bb0d05b190b9
Date: 2019-03-14 17:19 +0200
http://bitbucket.org/pypy/pypy/changeset/bb0d05b190b9/

Log:	merge py3.6 into release

diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst
--- a/pypy/doc/whatsnew-pypy3-head.rst
+++ b/pypy/doc/whatsnew-pypy3-head.rst
@@ -4,3 +4,7 @@
 
 .. this is the revision after release-pypy3.6-v7.1
 .. startrev: d642a3c217cb
+
+.. branch: zlib-make-py3-go-boom
+
+Complain if you try to copy a flushed zlib decompress on py3
diff --git a/pypy/module/__pypy__/test/test_newmemoryview.py b/pypy/module/__pypy__/test/test_newmemoryview.py
--- a/pypy/module/__pypy__/test/test_newmemoryview.py
+++ b/pypy/module/__pypy__/test/test_newmemoryview.py
@@ -26,7 +26,7 @@
 
 
         obj = B()
-        buf = buffer(obj)
+        buf = memoryview(obj)
         v = obj.data[2]
-        assert ord(buf[2]) == v
+        assert buf[2] == v
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -304,7 +304,7 @@
         while pos < end:
             oc = ord(obj[pos])
             raw_unicode_escape_helper(builder, oc)
-            pos += 1 
+            pos += 1
         return space.newtuple([space.newtext(builder.build()), w_end])
     else:
         raise oefmt(space.w_TypeError,
@@ -561,8 +561,8 @@
     return w_err_handler
 
 
- at unwrap_spec(errors='text')
-def encode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def encode(space, w_obj, encoding=None, errors=None):
     """encode(obj, [encoding[,errors]]) -> object
 
     Encodes obj using the codec registered for encoding. encoding defaults
@@ -572,20 +572,26 @@
     'xmlcharrefreplace' as well as any other name registered with
     codecs.register_error that can handle ValueErrors.
     """
-    if w_encoding is None:
+    if encoding is None:
         encoding = space.sys.defaultencoding
-    else:
-        encoding = space.text_w(w_encoding)
     w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
-    return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
+    w_retval =  _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
+    if not space.isinstance_w(w_retval, space.w_bytes):
+        raise oefmt(space.w_TypeError,
+                    "'%s' encoder returned '%T' instead of 'bytes'; "
+                    "use codecs.encode() to encode to arbitrary types",
+                    encoding,
+                    w_retval)
+    return w_retval
 
 @unwrap_spec(errors='text_or_none')
 def readbuffer_encode(space, w_data, errors='strict'):
     s = space.getarg_w('s#', w_data)
     return space.newtuple([space.newbytes(s), space.newint(len(s))])
 
- at unwrap_spec(errors='text')
-def decode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def decode(space, w_obj, encoding=None, errors=None):
+    from pypy.objspace.std.unicodeobject import W_UnicodeObject
     """decode(obj, [encoding[,errors]]) -> object
 
     Decodes obj using the codec registered for encoding. encoding defaults
@@ -595,12 +601,17 @@
     as well as any other name registered with codecs.register_error that is
     able to handle ValueErrors.
     """
-    if w_encoding is None:
+    if encoding is None:
         encoding = space.sys.defaultencoding
-    else:
-        encoding = space.text_w(w_encoding)
     w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
-    return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
+    w_retval = _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
+    if not isinstance(w_retval, W_UnicodeObject):
+        raise oefmt(space.w_TypeError,
+                    "'%s' decoder returned '%T' instead of 'str'; "
+                    "use codecs.decode() to decode to arbitrary types",
+                    encoding,
+                    w_retval)
+    return w_retval
 
 @unwrap_spec(errors='text')
 def register_error(space, errors, w_handler):
@@ -636,16 +647,6 @@
                     "use %s to handle arbitrary codecs", encoding, action)
     return codec_info
 
-def encode_text(space, w_obj, encoding, errors):
-    w_encoder = space.getitem(
-        lookup_text_codec(space, "codecs.encode()", encoding), space.newint(0))
-    return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
-
-def decode_text(space, w_obj, encoding, errors):
-    w_decoder = space.getitem(
-        lookup_text_codec(space, "codecs.decode()", encoding), space.newint(1))
-    return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
-
 # ____________________________________________________________
 
 def _find_implementation(impl_name):
@@ -735,7 +736,7 @@
         result = unicodehelper.utf8_encode_utf_8(utf8, errors,
                      state.encode_error_handler, allow_surrogates=False)
     except unicodehelper.ErrorHandlerError as e:
-        raise oefmt(space.w_IndexError, 
+        raise oefmt(space.w_IndexError,
                    "position %d from error handler invalid, already encoded %d",
                     e.new,e.old)
 
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -708,7 +708,9 @@
             return None
         _codecs.register(search_function)
         assert u"hello".encode("onearg") == b'foo'
-        assert b"hello".decode("onearg") == 'foo'
+        assert b"hello".decode("onearg") == u'foo'
+        assert _codecs.encode(u"hello", "onearg") == b'foo'
+        assert _codecs.decode(b"hello", "onearg") == u'foo'
 
     def test_cpytest_decode(self):
         import codecs
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1191,7 +1191,9 @@
     state.C.get_pyos_inputhook = rffi.llexternal(
         '_PyPy_get_PyOS_InputHook', [], FUNCPTR,
         compilation_info=eci, _nowrapper=True)
-
+    state.C.tuple_new = rffi.llexternal(
+        'tuple_new', [PyTypeObjectPtr, PyObject, PyObject], PyObject,
+        compilation_info=eci, _nowrapper=True)
 
 def init_function(func):
     INIT_FUNCTIONS.append(func)
diff --git a/pypy/module/cpyext/include/tupleobject.h b/pypy/module/cpyext/include/tupleobject.h
--- a/pypy/module/cpyext/include/tupleobject.h
+++ b/pypy/module/cpyext/include/tupleobject.h
@@ -18,6 +18,7 @@
 
 PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size);
 PyAPI_FUNC(void) _PyPy_tuple_dealloc(PyObject *);
+PyAPI_FUNC(PyObject *) tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 
 /* defined in varargswrapper.c */
 PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
diff --git a/pypy/module/cpyext/src/tupleobject.c b/pypy/module/cpyext/src/tupleobject.c
--- a/pypy/module/cpyext/src/tupleobject.c
+++ b/pypy/module/cpyext/src/tupleobject.c
@@ -89,3 +89,48 @@
 done:
     Py_TRASHCAN_SAFE_END(op)
 }
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+
+PyObject *
+tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *arg = NULL;
+    static char *kwlist[] = {"sequence", 0};
+
+    if (type != &PyTuple_Type)
+        return tuple_subtype_new(type, args, kwds);
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg))
+        return NULL;
+
+    if (arg == NULL)
+        return PyTuple_New(0);
+    else
+        return PySequence_Tuple(arg);
+}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *tmp, *newobj, *item;
+    Py_ssize_t i, n;
+
+    assert(PyType_IsSubtype(type, &PyTuple_Type));
+    tmp = tuple_new(&PyTuple_Type, args, kwds);
+    if (tmp == NULL)
+        return NULL;
+    assert(PyTuple_Check(tmp));
+    newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp));
+    if (newobj == NULL)
+        return NULL;
+    for (i = 0; i < n; i++) {
+        item = PyTuple_GET_ITEM(tmp, i);
+        Py_INCREF(item);
+        PyTuple_SET_ITEM(newobj, i, item);
+    }
+    Py_DECREF(tmp);
+    return newobj;
+}
+
+
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -226,3 +226,44 @@
             raises(SystemError, module.set_after_use, s)
         else:
             module.set_after_use(s)
+
+    def test_mp_length(self):
+        # issue 2968: creating a subclass of tuple in C led to recursion
+        # since the default tp_new needs to build a w_obj, but that needs
+        # to call space.len_w, which needs to call tp_new.
+        module = self.import_extension('foo', [
+            ("get_size", "METH_NOARGS",
+             """
+                return (PyObject*)&THPSizeType;
+             """),
+            ], prologue='''
+                #include "Python.h"
+
+                struct THPSize {
+                  PyTupleObject tuple;
+                } THPSize;
+
+                static PyMappingMethods THPSize_as_mapping = {
+                    0, //PyTuple_Type.tp_as_mapping->mp_length,
+                    0,
+                    0
+                };
+
+                PyTypeObject THPSizeType = {
+                  PyVarObject_HEAD_INIT(0, 0)
+                  "torch.Size",                          /* tp_name */
+                  sizeof(THPSize),                       /* tp_basicsize */
+                };
+            ''' , more_init = '''
+                THPSize_as_mapping.mp_length = PyTuple_Type.tp_as_mapping->mp_length;
+                THPSizeType.tp_base = &PyTuple_Type;
+                THPSizeType.tp_flags = Py_TPFLAGS_DEFAULT;
+                THPSizeType.tp_as_mapping = &THPSize_as_mapping;
+                THPSizeType.tp_new = PyTuple_Type.tp_new;
+                if (PyType_Ready(&THPSizeType) < 0) INITERROR;
+            ''')
+        SZ = module.get_size()
+        s = SZ((1, 2, 3))
+        assert len(s) == 3
+        assert len(s) == 3
+
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -686,6 +686,11 @@
         update_all_slots(space, w_type, pto)
     else:
         update_all_slots_builtin(space, w_type, pto)
+
+    # XXX generlize this pattern for various slot functions implemented in C
+    if space.is_w(w_type, space.w_tuple):
+        pto.c_tp_new = state.C.tuple_new
+
     if not pto.c_tp_new:
         base_object_pyo = make_ref(space, space.w_object)
         base_object_pto = rffi.cast(PyTypeObjectPtr, base_object_pyo)
diff --git a/pypy/module/zlib/interp_zlib.py b/pypy/module/zlib/interp_zlib.py
--- a/pypy/module/zlib/interp_zlib.py
+++ b/pypy/module/zlib/interp_zlib.py
@@ -313,6 +313,11 @@
         try:
             self.lock()
             try:
+                if not self.stream:
+                    raise oefmt(
+                        space.w_ValueError,
+                        "Decompressor was already flushed",
+                    )
                 copied = rzlib.inflateCopy(self.stream)
             finally:
                 self.unlock()
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -43,7 +43,7 @@
                 # best effort, too expensive to handle surrogates
                 ulength = rutf8.codepoints_in_utf(utf8str)
             except:
-                ulength = length 
+                ulength = length
             assert ulength == length
 
 
@@ -135,7 +135,7 @@
         if strict:
             raise oefmt(space.w_TypeError,
                 "%s arg must be None, unicode or str", strict)
-        return unicode_from_encoded_object(space, w_other, 'utf8', "strict")
+        return decode_object(space, w_other, 'utf8', "strict")
 
     def convert_to_w_unicode(self, space):
         return self
@@ -203,8 +203,7 @@
                 if space.isinstance_w(w_object, space.w_unicode):
                     raise oefmt(space.w_TypeError,
                             "decoding str is not supported")
-                w_value = unicode_from_encoded_object(space, w_object,
-                                                  encoding, errors)
+                w_value = decode_object(space, w_object, encoding, errors)
         if space.is_w(w_unicodetype, space.w_unicode):
             return w_value
 
@@ -649,7 +648,7 @@
     def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
         start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
         value = self._utf8
-        if (start > 0 and not space.is_none(w_end) and 
+        if (start > 0 and not space.is_none(w_end) and
                                 space.getindex_w(w_end, None) == 0):
             return space.w_False
         if space.isinstance_w(w_prefix, space.w_tuple):
@@ -674,7 +673,7 @@
         start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
         value = self._utf8
         # match cpython behaviour
-        if (start > 0 and not space.is_none(w_end) and 
+        if (start > 0 and not space.is_none(w_end) and
                                 space.getindex_w(w_end, None) == 0):
             return space.w_False
         if space.isinstance_w(w_suffix, space.w_tuple):
@@ -1207,11 +1206,11 @@
     errors = None if w_errors is None else space.text_w(w_errors)
     return encoding, errors
 
-
-def encode_object(space, w_object, encoding, errors):
-    from pypy.module._codecs.interp_codecs import encode_text
+def encode_object(space, w_obj, encoding, errors):
+    from pypy.module._codecs.interp_codecs import encode
     if errors is None or errors == 'strict':
-        utf8 = space.utf8_w(w_object)
+        # fast paths
+        utf8 = space.utf8_w(w_obj)
         if encoding is None or encoding == 'utf-8':
             try:
                 rutf8.check_utf8(utf8, False)
@@ -1230,21 +1229,12 @@
                     a.pos, a.pos + 1)
                 assert False, "always raises"
             return space.newbytes(utf8)
-    if encoding is None:
-        encoding = space.sys.defaultencoding
-    w_retval = encode_text(space, w_object, encoding, errors)
-    if not space.isinstance_w(w_retval, space.w_bytes):
-        raise oefmt(space.w_TypeError,
-                    "'%s' encoder returned '%T' instead of 'bytes'; "
-                    "use codecs.encode() to encode to arbitrary types",
-                    encoding,
-                    w_retval)
-    return w_retval
+    return encode(space, w_obj, encoding, errors)
 
 
 def decode_object(space, w_obj, encoding, errors=None):
-    assert encoding is not None
     if errors == 'strict' or errors is None:
+        # fast paths
         if encoding == 'ascii':
             s = space.charbuf_w(w_obj)
             unicodehelper.check_ascii_or_raise(space, s)
@@ -1253,27 +1243,8 @@
             s = space.charbuf_w(w_obj)
             lgt = unicodehelper.check_utf8_or_raise(space, s)
             return space.newutf8(s, lgt)
-    from pypy.module._codecs.interp_codecs import decode_text
-    w_retval = decode_text(space, w_obj, encoding, errors)
-    if not isinstance(w_retval, W_UnicodeObject):
-        raise oefmt(space.w_TypeError,
-                    "'%s' decoder returned '%T' instead of 'str'; "
-                    "use codecs.decode() to decode to arbitrary types",
-                    encoding,
-                    w_retval)
-    return w_retval
-
-
-def unicode_from_encoded_object(space, w_obj, encoding, errors):
-    if encoding is None:
-        encoding = getdefaultencoding(space)
-    w_retval = decode_object(space, w_obj, encoding, errors)
-    if not isinstance(w_retval, W_UnicodeObject):
-        raise oefmt(space.w_TypeError,
-                    "decoder did not return a str object (type '%T')",
-                    w_retval)
-    return w_retval
-
+    from pypy.module._codecs.interp_codecs import decode
+    return decode(space, w_obj, encoding, errors)
 
 def unicode_from_object(space, w_obj):
     if space.is_w(space.type(w_obj), space.w_unicode):
@@ -1281,6 +1252,7 @@
     if space.lookup(w_obj, "__str__") is not None:
         return space.str(w_obj)
     return space.repr(w_obj)
+
 def ascii_from_object(space, w_obj):
     """Implements builtins.ascii()"""
     # repr is guaranteed to be unicode
@@ -1292,7 +1264,7 @@
     # this is a performance and bootstrapping hack
     encoding = getdefaultencoding(space)
     if encoding != 'ascii':
-        return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
+        return decode_object(space, w_bytes, encoding, "strict")
     s = space.bytes_w(w_bytes)
     unicodehelper.check_ascii_or_raise(space, s)
     return W_UnicodeObject(s, len(s))
@@ -1897,7 +1869,7 @@
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
     utf8 = space.utf8_w(w_unistr)
-    lgt =  space.len_w(w_unistr) 
+    lgt =  space.len_w(w_unistr)
     result = StringBuilder(lgt)
     pos = 0
     for uchr in rutf8.Utf8StringIterator(utf8):
@@ -1920,7 +1892,7 @@
             raise OperationError(space.w_UnicodeEncodeError,
                                  space.newtuple([w_encoding, w_unistr,
                                                  w_start, w_end,
-                                                 w_reason]))            
+                                                 w_reason]))
         result.append(c)
         pos += 1
     return result.build()
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -40,17 +40,23 @@
     prev_indirect = prev
 
     def next_n(self, position, n, end_position):
-        for i in range(n):
+        i = 0
+        # avoid range(n) since n can be quite large
+        while i < n:
             if position >= end_position:
                 raise EndOfString
             position = rutf8.next_codepoint_pos(self._utf8, position)
+            i += 1
         return position
 
     def prev_n(self, position, n, start_position):
-        for i in range(n):
+        i = 0
+        # avoid range(n) since n can be quite large
+        while i < n:
             if position <= start_position:
                 raise EndOfString
             position = rutf8.prev_codepoint_pos(self._utf8, position)
+            i += 1
         assert position >= 0
         return position
 
diff --git a/rpython/rlib/test/test_rawrefcount_boehm.py b/rpython/rlib/test/test_rawrefcount_boehm.py
--- a/rpython/rlib/test/test_rawrefcount_boehm.py
+++ b/rpython/rlib/test/test_rawrefcount_boehm.py
@@ -111,7 +111,7 @@
         pyobjs.append(varname)
         return varname
 
-    for op in draw(strategies.lists(operations, average_size=250)):
+    for op in draw(strategies.lists(operations)):
         if op == 'new_gcobj':
             new_gcobj()
         elif op == 'new_pyobj':