[pypy-svn] r61084 - in pypy/trunk/pypy/module/_codecs: . test
fijal at codespeak.net
fijal at codespeak.net
Sun Jan 18 21:42:42 CET 2009
Author: fijal
Date: Sun Jan 18 21:42:40 2009
New Revision: 61084
Modified:
pypy/trunk/pypy/module/_codecs/app_codecs.py
pypy/trunk/pypy/module/_codecs/test/test_codecs.py
Log:
Some fight with codecs. More tests pass, but this is very ugly place.
I'm going to make tests pass and get out
Modified: pypy/trunk/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/app_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/app_codecs.py Sun Jan 18 21:42:40 2009
@@ -1,4 +1,4 @@
-
+# NOT_RPYTHON
# Note:
# This *is* now explicitly RPython.
# Please make sure not to break this.
@@ -118,8 +118,7 @@
return res, len(res)
def unicode_internal_decode( unistr, errors='strict'):
- """None
- """
+ import sys
if type(unistr) == unicode:
return unistr, len(unistr)
else:
@@ -133,7 +132,13 @@
start = 0
stop = unicode_bytes
step = 1
- while i < len(unistr)-unicode_bytes+1:
+ while i < len(unistr):
+ if len(unistr) - i < unicode_bytes:
+ msg = 'truncated input'
+ next, _ = unicode_call_errorhandler(errors, 'unicode_internal', msg,
+ unistr, i, i + unicode_bytes)
+ p += next
+ break
t = 0
h = 0
for j in range(start, stop, step):
@@ -145,9 +150,10 @@
except ValueError:
startpos = i - unicode_bytes
endpos = i
- raise UnicodeDecodeError('unicode_internal', unistr, startpos,
- endpos,
- "unichr(%s) not in range" % (t,))
+ msg = "unichr(%s) not in range" % (t,)
+ next, _ = unicode_call_errorhandler(errors, 'unicode_internal', msg,
+ unistr, startpos, endpos)
+ p += next
res = u''.join(p)
return res, len(unistr)
@@ -234,6 +240,14 @@
res = ''.join(res)
return res, len(res)
+def check_exception(exc):
+ try:
+ delta = exc.end - exc.start
+ if delta < 0 or not isinstance(exc.object, (unicode, str)):
+ raise TypeError("wrong exception")
+ except AttributeError:
+ raise TypeError("wrong exception")
+
def strict_errors(exc):
if isinstance(exc, Exception):
raise exc
@@ -241,6 +255,7 @@
raise TypeError("codec must pass exception instance")
def ignore_errors(exc):
+ check_exception(exc)
if isinstance(exc, UnicodeEncodeError):
return u'', exc.end
elif isinstance(exc, (UnicodeDecodeError, UnicodeTranslateError)):
@@ -251,6 +266,7 @@
Py_UNICODE_REPLACEMENT_CHARACTER = u"\ufffd"
def replace_errors(exc):
+ check_exception(exc)
if isinstance(exc, UnicodeEncodeError):
return u'?'*(exc.end-exc.start), exc.end
elif isinstance(exc, (UnicodeTranslateError, UnicodeDecodeError)):
@@ -356,7 +372,7 @@
return out, bits
def PyUnicode_DecodeUTF7(s, size, errors):
-
+ from _codecs import lookup_error
starts = s
errmsg = ""
inShift = 0
@@ -419,7 +435,8 @@
elif SPECIAL(ch, 0, 0) :
msg = "unexpected special character"
- raise UnicodeDecodeError('utf-7', s, i-1, i, msg)
+ out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+ p += out
else:
p += ch
else:
@@ -440,7 +457,8 @@
elif (SPECIAL(ch, 0, 0)):
i += 1
msg = "unexpected special character"
- raise UnicodeDecodeError('utf-7', s, i-1, i, msg)
+ out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+ p += out
else:
p += ch
i += 1
@@ -449,8 +467,8 @@
#XXX This aint right
endinpos = size
msg = "unterminated shift sequence"
- raise UnicodeDecodeError('utf-7', s, i-1, i, msg)
-
+ out, _ = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
+ p += out
return p
def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors):
@@ -606,7 +624,7 @@
else:
exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason)
res = errorHandler(exceptionObject)
- if isinstance(res, tuple) and isinstance(res[0], unicode) and isinstance(res[1], int):
+ if isinstance(res, tuple) and len(res) == 2 and isinstance(res[0], unicode) and isinstance(res[1], int):
newpos = res[1]
if (newpos < 0):
newpos = len(input) + newpos
Modified: pypy/trunk/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/test/test_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/test/test_codecs.py Sun Jan 18 21:42:40 2009
@@ -357,6 +357,8 @@
import codecs
res = codecs.charmap_decode("\x00\x01\x02", "replace", u"ab")
assert res == (u"ab\ufffd", 3)
+ res = codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe")
+ assert res == (u'ab\ufffd', 3)
def test_decode_errors(self):
import sys
@@ -370,3 +372,110 @@
assert ex.end == 8
else:
raise Exception("DID NOT RAISE")
+
+ def test_errors(self):
+ import codecs
+ assert (
+ codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch"))) == (
+ (u"?", 1)
+ )
+ assert (
+ codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch"))) == (
+ (u"\ufffd", 1)
+ )
+ assert (
+ codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch"))) == (
+ (u"\ufffd", 1)
+ )
+ class BadStartUnicodeEncodeError(UnicodeEncodeError):
+ def __init__(self):
+ UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
+ self.start = []
+
+ # A UnicodeEncodeError object with a bad object attribute
+ class BadObjectUnicodeEncodeError(UnicodeEncodeError):
+ def __init__(self):
+ UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
+ self.object = []
+
+ # A UnicodeDecodeError object without an end attribute
+ class NoEndUnicodeDecodeError(UnicodeDecodeError):
+ def __init__(self):
+ UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
+ del self.end
+
+ # A UnicodeDecodeError object with a bad object attribute
+ class BadObjectUnicodeDecodeError(UnicodeDecodeError):
+ def __init__(self):
+ UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
+ self.object = []
+
+ # A UnicodeTranslateError object without a start attribute
+ class NoStartUnicodeTranslateError(UnicodeTranslateError):
+ def __init__(self):
+ UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
+ del self.start
+
+ # A UnicodeTranslateError object without an end attribute
+ class NoEndUnicodeTranslateError(UnicodeTranslateError):
+ def __init__(self):
+ UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
+ del self.end
+
+ # A UnicodeTranslateError object without an object attribute
+ class NoObjectUnicodeTranslateError(UnicodeTranslateError):
+ def __init__(self):
+ UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
+ del self.object
+
+ import codecs
+ raises(TypeError, codecs.replace_errors, BadObjectUnicodeEncodeError())
+ raises(TypeError, codecs.replace_errors, 42)
+ # "replace" complains about the wrong exception type
+ raises(TypeError, codecs.replace_errors, UnicodeError("ouch"))
+ raises(TypeError, codecs.replace_errors, BadObjectUnicodeEncodeError())
+ raises(TypeError, codecs.replace_errors, BadObjectUnicodeDecodeError()
+ )
+ # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
+
+ def test_decode_ignore(self):
+ assert '\xff'.decode('utf-7', 'ignore') == ''
+ assert '\x00'.decode('unicode-internal', 'ignore') == ''
+
+ def test_badhandler(self):
+ import codecs
+ results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
+ encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
+
+ for res in results:
+ codecs.register_error("test.badhandler", lambda x: res)
+ for enc in encs:
+ raises(
+ TypeError,
+ u"\u3042".encode,
+ enc,
+ "test.badhandler"
+ )
+ for (enc, bytes) in (
+ ("utf-8", "\xff"),
+ ("ascii", "\xff"),
+ ("utf-7", "+x-"),
+ ("unicode-internal", "\x00"),
+ ):
+ raises(
+ TypeError,
+ bytes.decode,
+ enc,
+ "test.badhandler"
+ )
+
+ def test_unicode_internal(self):
+ try:
+ '\x00'.decode('unicode-internal')
+ except UnicodeDecodeError:
+ pass
+ else:
+ raise Exception("DID NOT RAISE")
+
+ res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace")
+ assert res == u"\u0000\ufffd"
More information about the Pypy-commit
mailing list