[pypy-commit] pypy py3.6: fix issue #3001: weird codecs can return strange types when using
cfbolz
pypy.commits at gmail.com
Thu Apr 18 05:24:57 EDT 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: py3.6
Changeset: r96518:b4f7c179d353
Date: 2019-04-18 11:24 +0200
http://bitbucket.org/pypy/pypy/changeset/b4f7c179d353/
Log: fix issue #3001: weird codecs can return strange types when using
codes.en/decode but not via unicode.encode/bytes.decode
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -575,14 +575,7 @@
if encoding is None:
encoding = space.sys.defaultencoding
w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
- w_retval = _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
- if not space.isinstance_w(w_retval, space.w_bytes):
- raise oefmt(space.w_TypeError,
- "'%s' encoder returned '%T' instead of 'bytes'; "
- "use codecs.encode() to encode to arbitrary types",
- encoding,
- w_retval)
- return w_retval
+ return _call_codec(space, w_encoder, w_obj, "encoding", encoding, errors)
@unwrap_spec(errors='text_or_none')
def readbuffer_encode(space, w_data, errors='strict'):
@@ -604,14 +597,7 @@
if encoding is None:
encoding = space.sys.defaultencoding
w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
- w_retval = _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
- if not isinstance(w_retval, W_UnicodeObject):
- raise oefmt(space.w_TypeError,
- "'%s' decoder returned '%T' instead of 'str'; "
- "use codecs.decode() to decode to arbitrary types",
- encoding,
- w_retval)
- return w_retval
+ return _call_codec(space, w_decoder, w_obj, "decoding", encoding, errors)
@unwrap_spec(errors='text')
def register_error(space, errors, w_handler):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1386,7 +1386,7 @@
"foo\udca5bar")
assert ("foo\udca5bar".encode("iso-8859-3", "surrogateescape") ==
b"foo\xa5bar")
-
+
def test_warn_escape_decode(self):
import warnings
import codecs
@@ -1399,5 +1399,31 @@
assert len(l) == 2
assert isinstance(l[0].message, DeprecationWarning)
+ def test_invalid_type_errors(self):
+ # hex is not a text encoding. it works via the codecs functions, but
+ # not the methods
+ import codecs
+ res = codecs.decode(b"aabb", "hex")
+ assert res == b"\xaa\xbb"
+ res = codecs.decode(u"aabb", "hex")
+ assert res == b"\xaa\xbb"
+ res = codecs.encode(b"\xaa\xbb", "hex")
+ assert res == b"aabb"
+ raises(LookupError, u"abc".encode, "hex")
+ def test_non_text_codec(self):
+ import _codecs
+ def search_function(encoding):
+ def f(input, errors="strict"):
+ return 52, len(input)
+ if encoding == 'test.mynontextenc':
+ return (f, f, None, None)
+ return None
+ _codecs.register(search_function)
+ res = _codecs.encode(u"abc", "test.mynontextenc")
+ assert res == 52
+ res = _codecs.decode(b"abc", "test.mynontextenc")
+ assert res == 52
+ raises(TypeError, u"abc".encode, "test.mynontextenc")
+ raises(TypeError, b"abc".decode, "test.mynontextenc")
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1235,7 +1235,14 @@
a.pos, a.pos + 1)
assert False, "always raises"
return space.newbytes(utf8)
- return encode(space, w_obj, encoding, errors)
+ w_retval = encode(space, w_obj, encoding, errors)
+ if not space.isinstance_w(w_retval, space.w_bytes):
+ raise oefmt(space.w_TypeError,
+ "'%s' encoder returned '%T' instead of 'bytes'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ w_retval)
+ return w_retval
def decode_object(space, w_obj, encoding, errors=None):
@@ -1250,7 +1257,14 @@
lgt = unicodehelper.check_utf8_or_raise(space, s)
return space.newutf8(s, lgt)
from pypy.module._codecs.interp_codecs import decode
- return decode(space, w_obj, encoding, errors)
+ w_retval = decode(space, w_obj, encoding, errors)
+ if not isinstance(w_retval, W_UnicodeObject):
+ raise oefmt(space.w_TypeError,
+ "'%s' decoder returned '%T' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ w_retval)
+ return w_retval
def unicode_from_object(space, w_obj):
if space.is_w(space.type(w_obj), space.w_unicode):
More information about the pypy-commit
mailing list