[pypy-commit] pypy release-pypy3.6-v7.x: merge py3.6 into release
mattip
pypy.commits at gmail.com
Thu Mar 14 11:20:28 EDT 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: release-pypy3.6-v7.x
Changeset: r96318:bb0d05b190b9
Date: 2019-03-14 17:19 +0200
http://bitbucket.org/pypy/pypy/changeset/bb0d05b190b9/
Log: merge py3.6 into release
diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst
--- a/pypy/doc/whatsnew-pypy3-head.rst
+++ b/pypy/doc/whatsnew-pypy3-head.rst
@@ -4,3 +4,7 @@
.. this is the revision after release-pypy3.6-v7.1
.. startrev: d642a3c217cb
+
+.. branch: zlib-make-py3-go-boom
+
+Complain if you try to copy a flushed zlib decompress on py3
diff --git a/pypy/module/__pypy__/test/test_newmemoryview.py b/pypy/module/__pypy__/test/test_newmemoryview.py
--- a/pypy/module/__pypy__/test/test_newmemoryview.py
+++ b/pypy/module/__pypy__/test/test_newmemoryview.py
@@ -26,7 +26,7 @@
obj = B()
- buf = buffer(obj)
+ buf = memoryview(obj)
v = obj.data[2]
- assert ord(buf[2]) == v
+ assert buf[2] == v
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -304,7 +304,7 @@
while pos < end:
oc = ord(obj[pos])
raw_unicode_escape_helper(builder, oc)
- pos += 1
+ pos += 1
return space.newtuple([space.newtext(builder.build()), w_end])
else:
raise oefmt(space.w_TypeError,
@@ -561,8 +561,8 @@
return w_err_handler
- at unwrap_spec(errors='text')
-def encode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def encode(space, w_obj, encoding=None, errors=None):
"""encode(obj, [encoding[,errors]]) -> object
Encodes obj using the codec registered for encoding. encoding defaults
@@ -572,20 +572,26 @@
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
- else:
- encoding = space.text_w(w_encoding)
w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
- return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
+ w_retval = _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
+ if not space.isinstance_w(w_retval, space.w_bytes):
+ raise oefmt(space.w_TypeError,
+ "'%s' encoder returned '%T' instead of 'bytes'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ w_retval)
+ return w_retval
@unwrap_spec(errors='text_or_none')
def readbuffer_encode(space, w_data, errors='strict'):
s = space.getarg_w('s#', w_data)
return space.newtuple([space.newbytes(s), space.newint(len(s))])
- at unwrap_spec(errors='text')
-def decode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def decode(space, w_obj, encoding=None, errors=None):
+ from pypy.objspace.std.unicodeobject import W_UnicodeObject
"""decode(obj, [encoding[,errors]]) -> object
Decodes obj using the codec registered for encoding. encoding defaults
@@ -595,12 +601,17 @@
as well as any other name registered with codecs.register_error that is
able to handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
- else:
- encoding = space.text_w(w_encoding)
w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
- return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
+ w_retval = _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
+ if not isinstance(w_retval, W_UnicodeObject):
+ raise oefmt(space.w_TypeError,
+ "'%s' decoder returned '%T' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ w_retval)
+ return w_retval
@unwrap_spec(errors='text')
def register_error(space, errors, w_handler):
@@ -636,16 +647,6 @@
"use %s to handle arbitrary codecs", encoding, action)
return codec_info
-def encode_text(space, w_obj, encoding, errors):
- w_encoder = space.getitem(
- lookup_text_codec(space, "codecs.encode()", encoding), space.newint(0))
- return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
-
-def decode_text(space, w_obj, encoding, errors):
- w_decoder = space.getitem(
- lookup_text_codec(space, "codecs.decode()", encoding), space.newint(1))
- return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
-
# ____________________________________________________________
def _find_implementation(impl_name):
@@ -735,7 +736,7 @@
result = unicodehelper.utf8_encode_utf_8(utf8, errors,
state.encode_error_handler, allow_surrogates=False)
except unicodehelper.ErrorHandlerError as e:
- raise oefmt(space.w_IndexError,
+ raise oefmt(space.w_IndexError,
"position %d from error handler invalid, already encoded %d",
e.new,e.old)
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -708,7 +708,9 @@
return None
_codecs.register(search_function)
assert u"hello".encode("onearg") == b'foo'
- assert b"hello".decode("onearg") == 'foo'
+ assert b"hello".decode("onearg") == u'foo'
+ assert _codecs.encode(u"hello", "onearg") == b'foo'
+ assert _codecs.decode(b"hello", "onearg") == u'foo'
def test_cpytest_decode(self):
import codecs
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1191,7 +1191,9 @@
state.C.get_pyos_inputhook = rffi.llexternal(
'_PyPy_get_PyOS_InputHook', [], FUNCPTR,
compilation_info=eci, _nowrapper=True)
-
+ state.C.tuple_new = rffi.llexternal(
+ 'tuple_new', [PyTypeObjectPtr, PyObject, PyObject], PyObject,
+ compilation_info=eci, _nowrapper=True)
def init_function(func):
INIT_FUNCTIONS.append(func)
diff --git a/pypy/module/cpyext/include/tupleobject.h b/pypy/module/cpyext/include/tupleobject.h
--- a/pypy/module/cpyext/include/tupleobject.h
+++ b/pypy/module/cpyext/include/tupleobject.h
@@ -18,6 +18,7 @@
PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size);
PyAPI_FUNC(void) _PyPy_tuple_dealloc(PyObject *);
+PyAPI_FUNC(PyObject *) tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
/* defined in varargswrapper.c */
PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
diff --git a/pypy/module/cpyext/src/tupleobject.c b/pypy/module/cpyext/src/tupleobject.c
--- a/pypy/module/cpyext/src/tupleobject.c
+++ b/pypy/module/cpyext/src/tupleobject.c
@@ -89,3 +89,48 @@
done:
Py_TRASHCAN_SAFE_END(op)
}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+
+PyObject *
+tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *arg = NULL;
+ static char *kwlist[] = {"sequence", 0};
+
+ if (type != &PyTuple_Type)
+ return tuple_subtype_new(type, args, kwds);
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg))
+ return NULL;
+
+ if (arg == NULL)
+ return PyTuple_New(0);
+ else
+ return PySequence_Tuple(arg);
+}
+
+static PyObject *
+tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *tmp, *newobj, *item;
+ Py_ssize_t i, n;
+
+ assert(PyType_IsSubtype(type, &PyTuple_Type));
+ tmp = tuple_new(&PyTuple_Type, args, kwds);
+ if (tmp == NULL)
+ return NULL;
+ assert(PyTuple_Check(tmp));
+ newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp));
+ if (newobj == NULL)
+ return NULL;
+ for (i = 0; i < n; i++) {
+ item = PyTuple_GET_ITEM(tmp, i);
+ Py_INCREF(item);
+ PyTuple_SET_ITEM(newobj, i, item);
+ }
+ Py_DECREF(tmp);
+ return newobj;
+}
+
+
diff --git a/pypy/module/cpyext/test/test_tupleobject.py b/pypy/module/cpyext/test/test_tupleobject.py
--- a/pypy/module/cpyext/test/test_tupleobject.py
+++ b/pypy/module/cpyext/test/test_tupleobject.py
@@ -226,3 +226,44 @@
raises(SystemError, module.set_after_use, s)
else:
module.set_after_use(s)
+
+ def test_mp_length(self):
+ # issue 2968: creating a subclass of tuple in C led to recursion
+ # since the default tp_new needs to build a w_obj, but that needs
+ # to call space.len_w, which needs to call tp_new.
+ module = self.import_extension('foo', [
+ ("get_size", "METH_NOARGS",
+ """
+ return (PyObject*)&THPSizeType;
+ """),
+ ], prologue='''
+ #include "Python.h"
+
+ struct THPSize {
+ PyTupleObject tuple;
+ } THPSize;
+
+ static PyMappingMethods THPSize_as_mapping = {
+ 0, //PyTuple_Type.tp_as_mapping->mp_length,
+ 0,
+ 0
+ };
+
+ PyTypeObject THPSizeType = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "torch.Size", /* tp_name */
+ sizeof(THPSize), /* tp_basicsize */
+ };
+ ''' , more_init = '''
+ THPSize_as_mapping.mp_length = PyTuple_Type.tp_as_mapping->mp_length;
+ THPSizeType.tp_base = &PyTuple_Type;
+ THPSizeType.tp_flags = Py_TPFLAGS_DEFAULT;
+ THPSizeType.tp_as_mapping = &THPSize_as_mapping;
+ THPSizeType.tp_new = PyTuple_Type.tp_new;
+ if (PyType_Ready(&THPSizeType) < 0) INITERROR;
+ ''')
+ SZ = module.get_size()
+ s = SZ((1, 2, 3))
+ assert len(s) == 3
+ assert len(s) == 3
+
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -686,6 +686,11 @@
update_all_slots(space, w_type, pto)
else:
update_all_slots_builtin(space, w_type, pto)
+
+ # XXX generlize this pattern for various slot functions implemented in C
+ if space.is_w(w_type, space.w_tuple):
+ pto.c_tp_new = state.C.tuple_new
+
if not pto.c_tp_new:
base_object_pyo = make_ref(space, space.w_object)
base_object_pto = rffi.cast(PyTypeObjectPtr, base_object_pyo)
diff --git a/pypy/module/zlib/interp_zlib.py b/pypy/module/zlib/interp_zlib.py
--- a/pypy/module/zlib/interp_zlib.py
+++ b/pypy/module/zlib/interp_zlib.py
@@ -313,6 +313,11 @@
try:
self.lock()
try:
+ if not self.stream:
+ raise oefmt(
+ space.w_ValueError,
+ "Decompressor was already flushed",
+ )
copied = rzlib.inflateCopy(self.stream)
finally:
self.unlock()
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -43,7 +43,7 @@
# best effort, too expensive to handle surrogates
ulength = rutf8.codepoints_in_utf(utf8str)
except:
- ulength = length
+ ulength = length
assert ulength == length
@@ -135,7 +135,7 @@
if strict:
raise oefmt(space.w_TypeError,
"%s arg must be None, unicode or str", strict)
- return unicode_from_encoded_object(space, w_other, 'utf8', "strict")
+ return decode_object(space, w_other, 'utf8', "strict")
def convert_to_w_unicode(self, space):
return self
@@ -203,8 +203,7 @@
if space.isinstance_w(w_object, space.w_unicode):
raise oefmt(space.w_TypeError,
"decoding str is not supported")
- w_value = unicode_from_encoded_object(space, w_object,
- encoding, errors)
+ w_value = decode_object(space, w_object, encoding, errors)
if space.is_w(w_unicodetype, space.w_unicode):
return w_value
@@ -649,7 +648,7 @@
def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
value = self._utf8
- if (start > 0 and not space.is_none(w_end) and
+ if (start > 0 and not space.is_none(w_end) and
space.getindex_w(w_end, None) == 0):
return space.w_False
if space.isinstance_w(w_prefix, space.w_tuple):
@@ -674,7 +673,7 @@
start, end = self._unwrap_and_compute_idx_params(space, w_start, w_end)
value = self._utf8
# match cpython behaviour
- if (start > 0 and not space.is_none(w_end) and
+ if (start > 0 and not space.is_none(w_end) and
space.getindex_w(w_end, None) == 0):
return space.w_False
if space.isinstance_w(w_suffix, space.w_tuple):
@@ -1207,11 +1206,11 @@
errors = None if w_errors is None else space.text_w(w_errors)
return encoding, errors
-
-def encode_object(space, w_object, encoding, errors):
- from pypy.module._codecs.interp_codecs import encode_text
+def encode_object(space, w_obj, encoding, errors):
+ from pypy.module._codecs.interp_codecs import encode
if errors is None or errors == 'strict':
- utf8 = space.utf8_w(w_object)
+ # fast paths
+ utf8 = space.utf8_w(w_obj)
if encoding is None or encoding == 'utf-8':
try:
rutf8.check_utf8(utf8, False)
@@ -1230,21 +1229,12 @@
a.pos, a.pos + 1)
assert False, "always raises"
return space.newbytes(utf8)
- if encoding is None:
- encoding = space.sys.defaultencoding
- w_retval = encode_text(space, w_object, encoding, errors)
- if not space.isinstance_w(w_retval, space.w_bytes):
- raise oefmt(space.w_TypeError,
- "'%s' encoder returned '%T' instead of 'bytes'; "
- "use codecs.encode() to encode to arbitrary types",
- encoding,
- w_retval)
- return w_retval
+ return encode(space, w_obj, encoding, errors)
def decode_object(space, w_obj, encoding, errors=None):
- assert encoding is not None
if errors == 'strict' or errors is None:
+ # fast paths
if encoding == 'ascii':
s = space.charbuf_w(w_obj)
unicodehelper.check_ascii_or_raise(space, s)
@@ -1253,27 +1243,8 @@
s = space.charbuf_w(w_obj)
lgt = unicodehelper.check_utf8_or_raise(space, s)
return space.newutf8(s, lgt)
- from pypy.module._codecs.interp_codecs import decode_text
- w_retval = decode_text(space, w_obj, encoding, errors)
- if not isinstance(w_retval, W_UnicodeObject):
- raise oefmt(space.w_TypeError,
- "'%s' decoder returned '%T' instead of 'str'; "
- "use codecs.decode() to decode to arbitrary types",
- encoding,
- w_retval)
- return w_retval
-
-
-def unicode_from_encoded_object(space, w_obj, encoding, errors):
- if encoding is None:
- encoding = getdefaultencoding(space)
- w_retval = decode_object(space, w_obj, encoding, errors)
- if not isinstance(w_retval, W_UnicodeObject):
- raise oefmt(space.w_TypeError,
- "decoder did not return a str object (type '%T')",
- w_retval)
- return w_retval
-
+ from pypy.module._codecs.interp_codecs import decode
+ return decode(space, w_obj, encoding, errors)
def unicode_from_object(space, w_obj):
if space.is_w(space.type(w_obj), space.w_unicode):
@@ -1281,6 +1252,7 @@
if space.lookup(w_obj, "__str__") is not None:
return space.str(w_obj)
return space.repr(w_obj)
+
def ascii_from_object(space, w_obj):
"""Implements builtins.ascii()"""
# repr is guaranteed to be unicode
@@ -1292,7 +1264,7 @@
# this is a performance and bootstrapping hack
encoding = getdefaultencoding(space)
if encoding != 'ascii':
- return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
+ return decode_object(space, w_bytes, encoding, "strict")
s = space.bytes_w(w_bytes)
unicodehelper.check_ascii_or_raise(space, s)
return W_UnicodeObject(s, len(s))
@@ -1897,7 +1869,7 @@
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
utf8 = space.utf8_w(w_unistr)
- lgt = space.len_w(w_unistr)
+ lgt = space.len_w(w_unistr)
result = StringBuilder(lgt)
pos = 0
for uchr in rutf8.Utf8StringIterator(utf8):
@@ -1920,7 +1892,7 @@
raise OperationError(space.w_UnicodeEncodeError,
space.newtuple([w_encoding, w_unistr,
w_start, w_end,
- w_reason]))
+ w_reason]))
result.append(c)
pos += 1
return result.build()
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -40,17 +40,23 @@
prev_indirect = prev
def next_n(self, position, n, end_position):
- for i in range(n):
+ i = 0
+ # avoid range(n) since n can be quite large
+ while i < n:
if position >= end_position:
raise EndOfString
position = rutf8.next_codepoint_pos(self._utf8, position)
+ i += 1
return position
def prev_n(self, position, n, start_position):
- for i in range(n):
+ i = 0
+ # avoid range(n) since n can be quite large
+ while i < n:
if position <= start_position:
raise EndOfString
position = rutf8.prev_codepoint_pos(self._utf8, position)
+ i += 1
assert position >= 0
return position
diff --git a/rpython/rlib/test/test_rawrefcount_boehm.py b/rpython/rlib/test/test_rawrefcount_boehm.py
--- a/rpython/rlib/test/test_rawrefcount_boehm.py
+++ b/rpython/rlib/test/test_rawrefcount_boehm.py
@@ -111,7 +111,7 @@
pyobjs.append(varname)
return varname
- for op in draw(strategies.lists(operations, average_size=250)):
+ for op in draw(strategies.lists(operations)):
if op == 'new_gcobj':
new_gcobj()
elif op == 'new_pyobj':
More information about the pypy-commit
mailing list