[pypy-commit] pypy unicode-utf8-py3: fix utf_7_decode(b'+') which should return (u'', 0)
mattip
pypy.commits at gmail.com
Wed Jan 16 17:44:37 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95656:a57fddf5aa3c
Date: 2019-01-16 11:48 +0200
http://bitbucket.org/pypy/pypy/changeset/a57fddf5aa3c/
Log: fix utf_7_decode(b'+') which should return (u'', 0)
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -949,11 +949,11 @@
# end of string
final_length = result.getlength()
if inShift and final: # in shift sequence, no more to follow
- # if we're in an inconsistent state, that's an error
inShift = 0
if (surrogate or
base64bits >= 6 or
(base64bits > 0 and base64buffer != 0)):
+ # if we're in an inconsistent state, that's an error
msg = "unterminated shift sequence"
r, pos, rettype = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
reslen = rutf8.check_utf8(r, True)
@@ -961,7 +961,7 @@
result.append(r)
final_length = result.getlength()
elif inShift:
- pos = startinpos
+ size = startinpos
final_length = shiftOutStartPos # back off output
assert final_length >= 0
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -686,7 +686,7 @@
final = space.is_true(w_final)
state = space.fromcache(CodecState)
result, length, pos = func(string, errors, final, state.decode_error_handler)
- # must return bytes, len_of_original_string
+ # must return bytes, pos
return space.newtuple([space.newutf8(result, length), space.newint(pos)])
wrap_decoder.__name__ = func.__name__
globals()[name] = wrap_decoder
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1029,6 +1029,11 @@
def test_utf7_surrogate(self):
assert b'+3ADYAA-'.decode('utf-7') == u'\udc00\ud800'
+ def test_utf_7_decode(self):
+ from _codecs import utf_7_decode
+ res = utf_7_decode(b'+')
+ assert res == (u'', 0)
+
def test_utf7_errors(self):
import codecs
tests = [
More information about the pypy-commit
mailing list