[pypy-commit] pypy stdlib-2.7.6: fix utf-7 decoder (cpython issue19279)
bdkearns
noreply at buildbot.pypy.org
Sun Mar 2 08:19:45 CET 2014
Author: Brian Kearns <bdkearns at gmail.com>
Branch: stdlib-2.7.6
Changeset: r69600:5a23ee926e6c
Date: 2014-03-02 02:19 -0500
http://bitbucket.org/pypy/pypy/changeset/5a23ee926e6c/
Log: fix utf-7 decoder (cpython issue19279)
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -591,6 +591,30 @@
def test_utf7_surrogate(self):
assert '+3ADYAA-'.decode('utf-7') == u'\udc00\ud800'
+ def test_utf7_errors(self):
+ import codecs
+ tests = [
+ ('a\xffb', u'a\ufffdb'),
+ ('a+IK', u'a\ufffd'),
+ ('a+IK-b', u'a\ufffdb'),
+ ('a+IK,b', u'a\ufffdb'),
+ ('a+IKx', u'a\u20ac\ufffd'),
+ ('a+IKx-b', u'a\u20ac\ufffdb'),
+ ('a+IKwgr', u'a\u20ac\ufffd'),
+ ('a+IKwgr-b', u'a\u20ac\ufffdb'),
+ ('a+IKwgr,', u'a\u20ac\ufffd'),
+ ('a+IKwgr,-b', u'a\u20ac\ufffd-b'),
+ ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'),
+ ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'),
+ ('a+/,+IKw-b', u'a\ufffd\u20acb'),
+ ('a+//,+IKw-b', u'a\ufffd\u20acb'),
+ ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
+ ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
+ ]
+ for raw, expected in tests:
+ raises(UnicodeDecodeError, codecs.utf_7_decode, raw, 'strict', True)
+ assert raw.decode('utf-7', 'replace') == expected
+
def test_utf_16_encode_decode(self):
import codecs, sys
x = u'123abc'
@@ -605,7 +629,7 @@
assert codecs.getdecoder('utf-16')(
'\xff\xfe1\x002\x003\x00a\x00b\x00c\x00') == (x, 14)
- def test_unicode_escape(self):
+ def test_unicode_escape(self):
assert u'\\'.encode('unicode-escape') == '\\\\'
assert '\\\\'.decode('unicode-escape') == u'\\'
assert u'\ud801'.encode('unicode-escape') == '\\ud801'
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -812,6 +812,7 @@
outCh = base64buffer >> (base64bits - 16)
base64bits -= 16
base64buffer &= (1 << base64bits) - 1 # clear high bits
+ assert outCh <= 0xffff
if surrogate:
# expecting a second surrogate
if outCh >= 0xDC00 and outCh <= 0xDFFFF:
@@ -877,6 +878,8 @@
else: # begin base64-encoded section
inShift = 1
shiftOutStartPos = pos - 1
+ base64bits = 0
+ base64buffer = 0
elif _utf7_DECODE_DIRECT(oc): # character decodes at itself
result.append(unichr(oc))
More information about the pypy-commit
mailing list