[pypy-commit] pypy stdlib-2.7.6: fix charmap_decode with non-BMP chars as integers (cpython issue15379)
bdkearns
noreply at buildbot.pypy.org
Sun Mar 2 09:19:22 CET 2014
Author: Brian Kearns <bdkearns at gmail.com>
Branch: stdlib-2.7.6
Changeset: r69602:87cf171379cb
Date: 2014-03-02 03:18 -0500
http://bitbucket.org/pypy/pypy/changeset/87cf171379cb/
Log: fix charmap_decode with non-BMP chars as integers (cpython
issue15379)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -525,33 +525,22 @@
raise
return errorchar
- # Charmap may return a unicode string
- try:
- x = space.unicode_w(w_ch)
- except OperationError, e:
- if not e.match(space, space.w_TypeError):
- raise
- else:
- return x
-
- # Charmap may return a number
- try:
+ if space.isinstance_w(w_ch, space.w_unicode):
+ # Charmap may return a unicode string
+ return space.unicode_w(w_ch)
+ elif space.isinstance_w(w_ch, space.w_int):
+ # Charmap may return a number
x = space.int_w(w_ch)
- except OperationError:
- if not e.match(space, space.w_TypeError):
- raise
- else:
- if 0 <= x < 65536: # Even on wide unicode builds...
- return unichr(x)
- else:
- raise OperationError(space.w_TypeError, space.wrap(
- "character mapping must be in range(65536)"))
-
- # Charmap may return None
- if space.is_w(w_ch, space.w_None):
+ if not 0 <= x <= 0x10FFFF:
+ raise oefmt(space.w_TypeError,
+ "character mapping must be in range(0x110000)")
+ return unichr(x)
+ elif space.is_w(w_ch, space.w_None):
+ # Charmap may return None
return errorchar
- raise OperationError(space.w_TypeError, space.wrap("invalid mapping"))
+ raise oefmt(space.w_TypeError,
+ "character mapping must return integer, None or unicode")
class Charmap_Encode:
def __init__(self, space, w_mapping):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -108,7 +108,16 @@
map = tuple([unichr(i) for i in range(256)])
assert charmap_decode('xxx\xff', 'strict', map) == (u'xxx\xff', 4)
- raises(TypeError, charmap_decode, '\xff', "replace", {0xff: 0x10001})
+ exc = raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 'a'})
+ assert exc.value[0] == "character mapping must return integer, None or unicode"
+ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000})
+ assert (charmap_decode("\x00\x01\x02", "strict",
+ {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
+ u"\U0010FFFFbc", 3)
+ assert (charmap_decode("\x00\x01\x02", "strict",
+ {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
+ u"\U0010FFFFbc", 3)
+
def test_unicode_escape(self):
from _codecs import unicode_escape_encode, unicode_escape_decode
@@ -116,6 +125,7 @@
assert unicode_escape_decode('abc') == (u'abc'.decode('unicode_escape'), 3)
assert unicode_escape_decode('\\x61\\x62\\x63') == (u'abc', 12)
+
class AppTestPartialEvaluation:
spaceconfig = dict(usemodules=('array',))
More information about the pypy-commit
mailing list