[pypy-commit] pypy py3.5: merge heads
rlamy
pypy.commits at gmail.com
Tue Nov 1 14:00:09 EDT 2016
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r88046:22a3e8d96787
Date: 2016-11-01 17:59 +0000
http://bitbucket.org/pypy/pypy/changeset/22a3e8d96787/
Log: merge heads
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,11 @@
from rpython.rlib import jit
from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.module.unicodedata import unicodedb
class VersionTag(object):
@@ -295,6 +296,26 @@
raise oefmt(space.w_TypeError,
"don't know how to handle %T in error callback", w_exc)
+def namereplace_errors(space, w_exc):
+ check_exception(space, w_exc)
+ if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+ obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
+ start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+ w_end = space.getattr(w_exc, space.wrap('end'))
+ end = space.int_w(w_end)
+ builder = StringBuilder()
+ pos = start
+ while pos < end:
+ oc = ord(obj[pos])
+ builder.append('\\N{')
+ builder.append(unicodedb.name(oc))
+ builder.append('}')
+ pos += 1
+ return space.newtuple([space.newbytes(builder.build()), w_end])
+ else:
+ raise oefmt(space.w_TypeError,
+ "don't know how to handle %T in error callback", w_exc)
+
def surrogatepass_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
@@ -382,7 +403,8 @@
"NOT_RPYTHON"
state = space.fromcache(CodecState)
for error in ("strict", "ignore", "replace", "xmlcharrefreplace",
- "backslashreplace", "surrogateescape", "surrogatepass"):
+ "backslashreplace", "surrogateescape", "surrogatepass",
+ "namereplace"):
name = error + "_errors"
state.codec_error_registry[error] = space.wrap(interp2app(globals()[name]))
@@ -667,7 +689,7 @@
return errorchar
raise oefmt(space.w_TypeError,
- "character mapping must return integer, None or unicode")
+ "character mapping must return integer, None or str")
class Charmap_Encode:
def __init__(self, space, w_mapping):
@@ -700,7 +722,7 @@
return errorchar
raise oefmt(space.w_TypeError,
- "character mapping must return integer, None or str")
+ "character mapping must return integer, bytes or None, not str")
@unwrap_spec(string='bufferstr', errors='str_or_None')
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -50,7 +50,7 @@
]
for s in insecure:
buf = b"S" + s + b"\012p0\012."
- raises (ValueError, pickle.loads, buf)
+ raises ((ValueError, pickle.UnpicklingError), pickle.loads, buf)
def test_unicodedecodeerror(self):
assert str(UnicodeDecodeError(
@@ -112,7 +112,7 @@
assert charmap_decode(b'xxx\xff', 'strict', map) == ('xxx\xff', 4)
exc = raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: b'a'})
- assert str(exc.value) == "character mapping must return integer, None or unicode"
+ assert str(exc.value) == "character mapping must return integer, None or str"
raises(TypeError, charmap_decode, b'\xff', "strict", {0xff: 0x110000})
assert (charmap_decode(b"\x00\x01\x02", "strict",
{0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
@@ -561,9 +561,14 @@
assert b'\xff'.decode('utf-7', 'ignore') == ''
assert b'\x00'.decode('unicode-internal', 'ignore') == ''
- def test_backslahreplace(self):
+ def test_backslashreplace(self):
assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == b'a\\xac\u1234\u20ac\u8000'
+ def test_namereplace(self):
+ assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
+ b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}')
+
def test_surrogateescape(self):
assert b'a\x80b'.decode('utf-8', 'surrogateescape') == 'a\udc80b'
assert 'a\udc80b'.encode('utf-8', 'surrogateescape') == b'a\x80b'
@@ -682,7 +687,7 @@
exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace", {0xff: 300})
assert str(exc.value) == 'character mapping must be in range(256)'
exc = raises(TypeError, codecs.charmap_encode, u'\xff', "replace", {0xff: u'a'})
- assert str(exc.value) == 'character mapping must return integer, None or str'
+ assert str(exc.value) == 'character mapping must return integer, bytes or None, not str'
raises(UnicodeError, codecs.charmap_encode, u"\xff", "replace", {0xff: None})
def test_charmap_encode_replace(self):
More information about the pypy-commit
mailing list