[pypy-commit] pypy kill-faking: merged upstream
alex_gaynor
noreply at buildbot.pypy.org
Mon Dec 3 00:42:43 CET 2012
Author: Alex Gaynor <alex.gaynor at gmail.com>
Branch: kill-faking
Changeset: r59269:041ebf6f58a7
Date: 2012-12-02 15:41 -0800
http://bitbucket.org/pypy/pypy/changeset/041ebf6f58a7/
Log: merged upstream
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -9,15 +9,19 @@
self.codec_search_cache = {}
self.codec_error_registry = {}
self.codec_need_encodings = True
- self.decode_error_handler = self.make_errorhandler(space, True)
- self.encode_error_handler = self.make_errorhandler(space, False)
+ self.decode_error_handler = self.make_decode_errorhandler(space)
+ self.encode_error_handler = self.make_encode_errorhandler(space)
self.unicodedata_handler = None
- def make_errorhandler(self, space, decode):
- def unicode_call_errorhandler(errors, encoding, reason, input,
- startpos, endpos):
+ def _make_errorhandler(self, space, decode):
+ def call_errorhandler(errors, encoding, reason, input, startpos,
+ endpos):
+ """Generic wrapper for calling into error handlers.
+ Returns (unicode_or_none, str_or_none, newpos) as error
+ handlers may return unicode or on Python 3, bytes.
+ """
w_errorhandler = lookup_error(space, errors)
if decode:
w_cls = space.w_UnicodeDecodeError
@@ -55,7 +59,19 @@
"position %d from error handler out of bounds", newpos)
replace = space.unicode_w(w_replace)
return replace, newpos
- return unicode_call_errorhandler
+ return call_errorhandler
+
+ def make_decode_errorhandler(self, space):
+ return self._make_errorhandler(space, True)
+
+ def make_encode_errorhandler(self, space):
+ errorhandler = self._make_errorhandler(space, False)
+ def encode_call_errorhandler(errors, encoding, reason, input, startpos,
+ endpos):
+ replace, newpos = errorhandler(errors, encoding, reason, input,
+ startpos, endpos)
+ return replace, None, newpos
+ return encode_call_errorhandler
def get_unicodedata_handler(self, space):
if self.unicodedata_handler:
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -279,10 +279,14 @@
replace = "?"
else:
assert errorcb
- ret, end = errorcb(errors, namecb, reason,
- unicodedata, start, end)
- codec = pypy_cjk_enc_getcodec(encodebuf)
- replace = encode(codec, ret, "strict", errorcb, namecb)
+ retu, rets, end = errorcb(errors, namecb, reason,
+ unicodedata, start, end)
+ if rets is not None:
+ # py3k only
+ replace = rets
+ else:
+ codec = pypy_cjk_enc_getcodec(encodebuf)
+ replace = encode(codec, retu, "strict", errorcb, namecb)
inbuf = rffi.get_nonmovingbuffer(replace)
try:
r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -102,3 +102,11 @@
repl = u"\u2014"
s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
assert s == '\xA1\xAA'
+
+ def test_encode_custom_error_handler_type(self):
+ import codecs
+ import sys
+ codecs.register_error("test.test_encode_custom_error_handler_type",
+ lambda e: ('\xc3', e.end))
+ raises(TypeError, u"\uDDA1".encode, "gbk",
+ "test.test_encode_custom_error_handler_type")
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -122,3 +122,10 @@
c = getcodec('iso2022_jp')
s = encode(c, u'\u83ca\u5730\u6642\u592b')
assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str
+
+def test_encode_custom_error_handler_bytes():
+ c = getcodec("hz")
+ def errorhandler(errors, enc, msg, t, startingpos, endingpos):
+ return None, '\xc3', endingpos
+ s = encode(c, u'abc\u1234def', 'foo', errorhandler)
+ assert '\xc3' in s
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -57,9 +57,9 @@
def default_unicode_error_encode(errors, encoding, msg, u,
startingpos, endingpos):
if errors == 'replace':
- return u'?', endingpos
+ return u'?', None, endingpos
if errors == 'ignore':
- return u'', endingpos
+ return u'', None, endingpos
raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
# ____________________________________________________________
@@ -300,10 +300,14 @@
_encodeUCS4(result, ch3)
continue
if not allow_surrogates:
- r, pos = errorhandler(errors, 'utf-8',
- 'surrogates not allowed',
- s, pos-1, pos)
- for ch in r:
+ ru, rs, pos = errorhandler(errors, 'utf-8',
+ 'surrogates not allowed',
+ s, pos-1, pos)
+ if rs is not None:
+ # py3k only
+ result.append(rs)
+ continue
+ for ch in ru:
if ord(ch) < 0x80:
result.append(chr(ord(ch)))
else:
@@ -976,9 +980,13 @@
collend = pos+1
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
- r, pos = errorhandler(errors, encoding, reason, p,
- collstart, collend)
- for ch in r:
+ ru, rs, pos = errorhandler(errors, encoding, reason, p,
+ collstart, collend)
+ if rs is not None:
+ # py3k only
+ result.append(rs)
+ continue
+ for ch in ru:
if ord(ch) < limit:
result.append(chr(ord(ch)))
else:
@@ -1048,10 +1056,14 @@
c = mapping.get(ch, '')
if len(c) == 0:
- res, pos = errorhandler(errors, "charmap",
- "character maps to <undefined>",
- s, pos, pos + 1)
- for ch2 in res:
+ ru, rs, pos = errorhandler(errors, "charmap",
+ "character maps to <undefined>",
+ s, pos, pos + 1)
+ if rs is not None:
+ # py3k only
+ result.append(rs)
+ continue
+ for ch2 in ru:
c2 = mapping.get(ch2, '')
if len(c2) == 0:
errorhandler(
@@ -1650,9 +1662,12 @@
pass
collend += 1
msg = "invalid decimal Unicode string"
- r, pos = errorhandler(errors, 'decimal',
- msg, s, collstart, collend)
- for char in r:
+ ru, rs, pos = errorhandler(errors, 'decimal',
+ msg, s, collstart, collend)
+ if rs is not None:
+ # py3k only
+ errorhandler('strict', 'decimal', msg, s, collstart, collend)
+ for char in ru:
ch = ord(char)
if unicodedb.isspace(ch):
result.append(' ')
diff --git a/pypy/rlib/test/test_runicode.py b/pypy/rlib/test/test_runicode.py
--- a/pypy/rlib/test/test_runicode.py
+++ b/pypy/rlib/test/test_runicode.py
@@ -66,12 +66,19 @@
assert t is s
assert start == startingpos
assert stop == endingpos
- return "42424242", stop
+ return u"42424242", None, stop
encoder = self.getencoder(encoding)
result = encoder(s, len(s), "foo!", errorhandler)
assert called[0]
assert "42424242" in result
+ # ensure bytes results passthru
+ def errorhandler_bytes(errors, enc, msg, t, startingpos,
+ endingpos):
+ return None, '\xc3', endingpos
+ result = encoder(s, len(s), "foo!", errorhandler_bytes)
+ assert '\xc3' in result
+
def checkdecodeerror(self, s, encoding, start, stop,
addstuff=True, msg=None):
called = [0]
More information about the pypy-commit
mailing list