[pypy-commit] pypy py3.5: merge heads

Tue Nov 1 14:00:09 EDT 2016

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r88046:22a3e8d96787
Date: 2016-11-01 17:59 +0000
http://bitbucket.org/pypy/pypy/changeset/22a3e8d96787/

Log:	merge heads

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,11 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
 from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.module.unicodedata import unicodedb
 
 
 class VersionTag(object):
@@ -295,6 +296,26 @@
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
 
+def namereplace_errors(space, w_exc):
+    check_exception(space, w_exc)
+    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
+        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+        w_end = space.getattr(w_exc, space.wrap('end'))
+        end = space.int_w(w_end)
+        builder = StringBuilder()
+        pos = start
+        while pos < end:
+            oc = ord(obj[pos])
+            builder.append('\\N{')
+            builder.append(unicodedb.name(oc))
+            builder.append('}')
+            pos += 1
+        return space.newtuple([space.newbytes(builder.build()), w_end])
+    else:
+        raise oefmt(space.w_TypeError,
+                    "don't know how to handle %T in error callback", w_exc)
+
 def surrogatepass_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
@@ -382,7 +403,8 @@
     "NOT_RPYTHON"
     state = space.fromcache(CodecState)
     for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
-                  "backslashreplace", "surrogateescape", "surrogatepass"):
+                  "backslashreplace", "surrogateescape", "surrogatepass",
+                  "namereplace"):
         name = error + "_errors"
         state.codec_error_registry[error] = space.wrap(interp2app(globals()[name]))
 
@@ -667,7 +689,7 @@
             return errorchar
 
         raise oefmt(space.w_TypeError,
-            "character mapping must return integer, None or unicode")
+            "character mapping must return integer, None or str")
 
 class Charmap_Encode:
     def __init__(self, space, w_mapping):
@@ -700,7 +722,7 @@
             return errorchar
 
         raise oefmt(space.w_TypeError,
-            "character mapping must return integer, None or str")
+            "character mapping must return integer, bytes or None, not str")
 
 
 @unwrap_spec(string='bufferstr', errors='str_or_None')
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -50,7 +50,7 @@
                     ]
         for s in insecure:
             buf = b"S" + s + b"\012p0\012."
-            raises (ValueError, pickle.loads, buf)
+            raises ((ValueError, pickle.UnpicklingError), pickle.loads, buf)
 
     def test_unicodedecodeerror(self):
         assert str(UnicodeDecodeError(
@@ -112,7 +112,7 @@
         assert charmap_decode(b'xxx\xff', 'strict', map) == ('xxx\xff', 4)
 
         exc = raises(TypeError, charmap_decode, b'\xff', "strict",  {0xff: b'a'})
-        assert str(exc.value) == "character mapping must return integer, None or unicode"
+        assert str(exc.value) == "character mapping must return integer, None or str"
         raises(TypeError, charmap_decode, b'\xff', "strict",  {0xff: 0x110000})
         assert (charmap_decode(b"\x00\x01\x02", "strict",
                                {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
@@ -561,9 +561,14 @@
         assert b'\xff'.decode('utf-7', 'ignore') == ''
         assert b'\x00'.decode('unicode-internal', 'ignore') == ''
 
-    def test_backslahreplace(self):
+    def test_backslashreplace(self):
         assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000'
 
+    def test_namereplace(self):
+        assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
+            b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+            b'\\N{CJK UNIFIED IDEOGRAPH-8000}')
+
     def test_surrogateescape(self):
         assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b'
         assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b'
@@ -682,7 +687,7 @@
         exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace",  {0xff: 300})
         assert str(exc.value) == 'character mapping must be in range(256)'
         exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace",  {0xff: u'a'})
-        assert str(exc.value) == 'character mapping must return integer, None or str'
+        assert str(exc.value) == 'character mapping must return integer, bytes or None, not str'
         raises(UnicodeError, codecs.charmap_encode, u"\xff", "replace", {0xff: None})
 
     def test_charmap_encode_replace(self):