[pypy-commit] pypy unicode-utf8-py3: fix improper changes from merge
mattip
pypy.commits at gmail.com
Wed Jan 2 09:09:50 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95569:0b7bb06a4e4f
Date: 2019-01-02 16:08 +0200
http://bitbucket.org/pypy/pypy/changeset/0b7bb06a4e4f/
Log: fix improper changes from merge
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -723,7 +723,7 @@
assert b'\x00'.decode('unicode-internal', 'ignore') == ''
def test_backslashreplace(self):
- import sys
+ import sys, codecs
sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
if sys.maxunicode > 65535:
expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
@@ -888,7 +888,7 @@
codecs.register_error("test.hui", handler_unicodeinternal)
res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
if sys.maxunicode > 65535:
- assert res == u"\u0000\u0001" # UCS4 build
+ assert res == u"\u0000\u0001\u0000" # UCS4 build
else:
assert res == u"\x00\x00\x01" # UCS2 build
@@ -945,7 +945,7 @@
def test_encode_error_bad_handler(self):
import codecs
codecs.register_error("test.bad_handler", lambda e: (repl, 1))
- assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+ assert u"xyz".encode("latin-1", "test.bad_handler") == b"xyz"
repl = u"\u1234"
raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
"test.bad_handler")
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1151,25 +1151,11 @@
elif mytype.typecode == 'c':
return space.newbytes(item)
elif mytype.typecode == 'u':
- code = r_uint(ord(item))
- # cpython will allow values > sys.maxunicode
- # while silently truncating the top bits
- if code <= r_uint(0x7F):
- # Encode ASCII
- item = chr(code)
- elif code <= r_uint(0x07FF):
- item = (chr((0xc0 | (code >> 6))) +
- chr((0x80 | (code & 0x3f))))
- elif code <= r_uint(0xFFFF):
- item = (chr((0xe0 | (code >> 12))) +
- chr((0x80 | ((code >> 6) & 0x3f))) +
- chr((0x80 | (code & 0x3f))))
- else:
- item = (chr((0xf0 | (code >> 18)) & 0xff) +
- chr((0x80 | ((code >> 12) & 0x3f))) +
- chr((0x80 | ((code >> 6) & 0x3f))) +
- chr((0x80 | (code & 0x3f))))
- return space.newutf8(item, 1)
+ if ord(item) >= 0x110000:
+ raise oefmt(space.w_ValueError,
+ "array contains a unicode character out of "
+ "range(0x110000)")
+ return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1)
assert 0, "unreachable"
# interface
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -499,6 +499,7 @@
else:
s = ''
if len(s) == 1:
+ self.std_wp(s)
return
raise oefmt(space.w_TypeError, "%c requires int or single byte")
else:
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1276,8 +1276,8 @@
assert type(str(z)) is str
assert str(z) == u'foobaz'
#
- assert unicode(encoding='supposedly_the_encoding') == u''
- assert unicode(errors='supposedly_the_error') == u''
+ assert str(encoding='supposedly_the_encoding') == u''
+ assert str(errors='supposedly_the_error') == u''
e = raises(TypeError, str, u'', 'supposedly_the_encoding')
assert str(e.value) == 'decoding str is not supported'
e = raises(TypeError, str, u'', errors='supposedly_the_error')
More information about the pypy-commit
mailing list