[pypy-commit] pypy unicode-utf8-py3: cpython prefers "invalid continuation byte" over "unxepected end of data"
mattip
pypy.commits at gmail.com
Sun Jan 20 12:11:38 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95679:98c9d750a001
Date: 2019-01-20 12:05 +0200
http://bitbucket.org/pypy/pypy/changeset/98c9d750a001/
Log: cpython prefers "invalid continuation byte" over "unxepected end of
data"
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -395,8 +395,12 @@
if not final:
pos -= 1
break
- r, pos, rettype = errorhandler(errors, "utf8", "unexpected end of data",
- s, pos - 1, pos)
+ if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1,
+ ord(s[pos]), allow_surrogates):
+ msg = "invalid continuation byte"
+ else:
+ msg = "unexpected end of data"
+ r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos)
res.append(r)
suppressing = True
continue
@@ -429,8 +433,13 @@
if not final:
pos -= 1
break
- r, pos, rettype = errorhandler(errors, "utf8", "unexpected end of data",
- s, pos - 1, pos)
+ if pos < end and rutf8._invalid_byte_2_of_4(ordch1, s[pos]):
+ msg = "invalid continuation byte"
+ elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 1])):
+ msg = "invalid continuation byte"
+ else:
+ msg = "unexpected end of data"
+ r, pos, rettype = errorhandler(errors, "utf8", msg, s, pos - 1, pos)
res.append(r)
suppressing = True
continue
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -544,6 +544,10 @@
assert codecs.decode(b'abc') == 'abc'
exc = raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
+ exc = raises(UnicodeDecodeError, codecs.decode, b'\xe0\x00', 'utf-8')
+ print(dir(exc.value))
+ assert 'invalid continuation byte' in exc.value.reason
+
def test_bad_errorhandler_return(self):
import codecs
def baddecodereturn1(exc):
More information about the pypy-commit
mailing list