[pypy-commit] pypy unicode-utf8-py3: fix improper changes from merge

Wed Jan 2 09:09:50 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95569:0b7bb06a4e4f
Date: 2019-01-02 16:08 +0200
http://bitbucket.org/pypy/pypy/changeset/0b7bb06a4e4f/

Log:	fix improper changes from merge

diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -723,7 +723,7 @@
         assert b'\x00'.decode('unicode-internal', 'ignore') == ''
 
     def test_backslashreplace(self):
-        import sys
+        import sys, codecs
         sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
         if sys.maxunicode > 65535:
             expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
@@ -888,7 +888,7 @@
         codecs.register_error("test.hui", handler_unicodeinternal)
         res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
         if sys.maxunicode > 65535:
-            assert res == u"\u0000\u0001"   # UCS4 build
+            assert res == u"\u0000\u0001\u0000"   # UCS4 build
         else:
             assert res == u"\x00\x00\x01" # UCS2 build
 
@@ -945,7 +945,7 @@
     def test_encode_error_bad_handler(self):
         import codecs
         codecs.register_error("test.bad_handler", lambda e: (repl, 1))
-        assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz"
+        assert u"xyz".encode("latin-1", "test.bad_handler") == b"xyz"
         repl = u"\u1234"
         raises(UnicodeEncodeError, u"\u5678".encode, "latin-1",
                "test.bad_handler")
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1151,25 +1151,11 @@
             elif mytype.typecode == 'c':
                 return space.newbytes(item)
             elif mytype.typecode == 'u':
-                code = r_uint(ord(item))
-                # cpython will allow values > sys.maxunicode
-                # while silently truncating the top bits
-                if code <= r_uint(0x7F):
-                    # Encode ASCII
-                    item = chr(code)
-                elif code <= r_uint(0x07FF):
-                    item = (chr((0xc0 | (code >> 6))) + 
-                            chr((0x80 | (code & 0x3f))))
-                elif code <= r_uint(0xFFFF):
-                    item = (chr((0xe0 | (code >> 12))) +
-                            chr((0x80 | ((code >> 6) & 0x3f))) +
-                            chr((0x80 | (code & 0x3f))))
-                else:
-                    item = (chr((0xf0 | (code >> 18)) & 0xff) +
-                            chr((0x80 | ((code >> 12) & 0x3f))) +
-                            chr((0x80 | ((code >> 6) & 0x3f))) +
-                            chr((0x80 | (code & 0x3f))))
-                return space.newutf8(item, 1)
+                if ord(item) >= 0x110000:
+                    raise oefmt(space.w_ValueError,
+                                "array contains a unicode character out of "
+                                "range(0x110000)")
+                return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1)
             assert 0, "unreachable"
 
         # interface
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -499,6 +499,7 @@
                 else:
                     s = ''
                 if len(s) == 1:
+                    self.std_wp(s)
                     return
                 raise oefmt(space.w_TypeError, "%c requires int or single byte")
             else:
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1276,8 +1276,8 @@
         assert type(str(z)) is str
         assert str(z) == u'foobaz'
         #
-        assert unicode(encoding='supposedly_the_encoding') == u''
-        assert unicode(errors='supposedly_the_error') == u''
+        assert str(encoding='supposedly_the_encoding') == u''
+        assert str(errors='supposedly_the_error') == u''
         e = raises(TypeError, str, u'', 'supposedly_the_encoding')
         assert str(e.value) == 'decoding str is not supported'
         e = raises(TypeError, str, u'', errors='supposedly_the_error')