[pypy-commit] pypy stdlib-2.7.6: fix charmap_decode with non-BMP chars as integers (cpython issue15379)

bdkearns noreply at buildbot.pypy.org
Sun Mar 2 09:19:22 CET 2014


Author: Brian Kearns <bdkearns at gmail.com>
Branch: stdlib-2.7.6
Changeset: r69602:87cf171379cb
Date: 2014-03-02 03:18 -0500
http://bitbucket.org/pypy/pypy/changeset/87cf171379cb/

Log:	fix charmap_decode with non-BMP chars as integers (cpython
	issue15379)

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -525,33 +525,22 @@
                     raise
                 return errorchar
 
-        # Charmap may return a unicode string
-        try:
-            x = space.unicode_w(w_ch)
-        except OperationError, e:
-            if not e.match(space, space.w_TypeError):
-                raise
-        else:
-            return x
-
-        # Charmap may return a number
-        try:
+        if space.isinstance_w(w_ch, space.w_unicode):
+            # Charmap may return a unicode string
+            return space.unicode_w(w_ch)
+        elif space.isinstance_w(w_ch, space.w_int):
+            # Charmap may return a number
             x = space.int_w(w_ch)
-        except OperationError:
-            if not e.match(space, space.w_TypeError):
-                raise
-        else:
-            if 0 <= x < 65536: # Even on wide unicode builds...
-                return unichr(x)
-            else:
-                raise OperationError(space.w_TypeError, space.wrap(
-                    "character mapping must be in range(65536)"))
-
-        # Charmap may return None
-        if space.is_w(w_ch, space.w_None):
+            if not 0 <= x <= 0x10FFFF:
+                raise oefmt(space.w_TypeError,
+                    "character mapping must be in range(0x110000)")
+            return unichr(x)
+        elif space.is_w(w_ch, space.w_None):
+            # Charmap may return None
             return errorchar
 
-        raise OperationError(space.w_TypeError, space.wrap("invalid mapping"))
+        raise oefmt(space.w_TypeError,
+            "character mapping must return integer, None or unicode")
 
 class Charmap_Encode:
     def __init__(self, space, w_mapping):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -108,7 +108,16 @@
         map = tuple([unichr(i) for i in range(256)])
         assert charmap_decode('xxx\xff', 'strict', map) == (u'xxx\xff', 4)
 
-        raises(TypeError, charmap_decode, '\xff', "replace",  {0xff: 0x10001})
+        exc = raises(TypeError, charmap_decode, '\xff', "strict",  {0xff: 'a'})
+        assert exc.value[0] == "character mapping must return integer, None or unicode"
+        raises(TypeError, charmap_decode, '\xff', "strict",  {0xff: 0x110000})
+        assert (charmap_decode("\x00\x01\x02", "strict",
+                               {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
+                u"\U0010FFFFbc", 3)
+        assert (charmap_decode("\x00\x01\x02", "strict",
+                               {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
+                u"\U0010FFFFbc", 3)
+
 
     def test_unicode_escape(self):
         from _codecs import unicode_escape_encode, unicode_escape_decode
@@ -116,6 +125,7 @@
         assert unicode_escape_decode('abc') == (u'abc'.decode('unicode_escape'), 3)
         assert unicode_escape_decode('\\x61\\x62\\x63') == (u'abc', 12)
 
+
 class AppTestPartialEvaluation:
     spaceconfig = dict(usemodules=('array',))
 


More information about the pypy-commit mailing list