[pypy-commit] pypy py3.6: fix unicode.encode('hex'), bytes.decode('hex') to raise LookupError
mattip
pypy.commits at gmail.com
Sun Aug 25 13:11:12 EDT 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.6
Changeset: r97256:064c9ee805b4
Date: 2019-08-25 20:10 +0300
http://bitbucket.org/pypy/pypy/changeset/064c9ee805b4/
Log: fix unicode.encode('hex'), bytes.decode('hex') to raise LookupError
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -618,10 +618,10 @@
# ____________________________________________________________
# Helpers for unicode.encode() and bytes.decode()
def lookup_text_codec(space, action, encoding):
- codec_info = lookup_codec(space, encoding)
+ w_codec_info = lookup_codec(space, encoding)
try:
is_text_encoding = space.is_true(
- space.getattr(codec_info, space.newtext('_is_text_encoding')))
+ space.getattr(w_codec_info, space.newtext('_is_text_encoding')))
except OperationError as e:
if e.match(space, space.w_AttributeError):
is_text_encoding = True
@@ -630,8 +630,8 @@
if not is_text_encoding:
raise oefmt(space.w_LookupError,
"'%s' is not a text encoding; "
- "use %s to handle arbitrary codecs", encoding, action)
- return codec_info
+ "use codecs.%s() to handle arbitrary codecs", encoding, action)
+ return w_codec_info
# ____________________________________________________________
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -780,6 +780,11 @@
raises(UnicodeError, b"\xc2".decode, "utf-8")
assert b'\xe1\x80'.decode('utf-8', 'replace') == "\ufffd"
+ def test_invalid_lookup(self):
+
+ raises(LookupError, u"abcd".encode, "hex")
+ raises(LookupError, b"abcd".decode, "hex")
+
def test_repr_printable(self):
# PEP 3138: __repr__ respects printable characters.
x = '\u027d'
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1236,7 +1236,7 @@
return encoding, errors
def encode_object(space, w_obj, encoding, errors):
- from pypy.module._codecs.interp_codecs import encode
+ from pypy.module._codecs.interp_codecs import _call_codec, lookup_text_codec
if errors is None or errors == 'strict':
# fast paths
utf8 = space.utf8_w(w_obj)
@@ -1258,7 +1258,11 @@
a.pos, a.pos + 1)
assert False, "always raises"
return space.newbytes(utf8)
- w_retval = encode(space, w_obj, encoding, errors)
+ if encoding is None:
+ encoding = space.sys.defaultencoding
+ w_codec_info = lookup_text_codec(space, 'encode', encoding)
+ w_encfunc = space.getitem(w_codec_info, space.newint(0))
+ w_retval = _call_codec(space, w_encfunc, w_obj, "encoding", encoding, errors)
if not space.isinstance_w(w_retval, space.w_bytes):
raise oefmt(space.w_TypeError,
"'%s' encoder returned '%T' instead of 'bytes'; "
@@ -1269,6 +1273,7 @@
def decode_object(space, w_obj, encoding, errors=None):
+ from pypy.module._codecs.interp_codecs import _call_codec, lookup_text_codec
if errors == 'strict' or errors is None:
# fast paths
if encoding == 'ascii':
@@ -1279,8 +1284,11 @@
s = space.charbuf_w(w_obj)
lgt = unicodehelper.check_utf8_or_raise(space, s)
return space.newutf8(s, lgt)
- from pypy.module._codecs.interp_codecs import decode
- w_retval = decode(space, w_obj, encoding, errors)
+ if encoding is None:
+ encoding = space.sys.defaultencoding
+ w_codec_info = lookup_text_codec(space, 'decode', encoding)
+ w_encfunc = space.getitem(w_codec_info, space.newint(1))
+ w_retval = _call_codec(space, w_encfunc, w_obj, "decoding", encoding, errors)
if not isinstance(w_retval, W_UnicodeObject):
raise oefmt(space.w_TypeError,
"'%s' decoder returned '%T' instead of 'str'; "
More information about the pypy-commit
mailing list