[pypy-commit] pypy default: Copy the CPython-style error messages more closely
arigo
noreply at buildbot.pypy.org
Sat Jan 18 14:44:51 CET 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r68763:3f01d4d55bcc
Date: 2014-01-18 14:44 +0100
http://bitbucket.org/pypy/pypy/changeset/3f01d4d55bcc/
Log: Copy the CPython-style error messages more closely
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -153,7 +153,7 @@
# about the pos anymore and we just ignore the value
if not charsleft:
# there's only the start byte and nothing else
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'unexpected end of data',
s, pos, pos+1)
result.append(r)
@@ -165,14 +165,14 @@
(ordch1 == 0xe0 and ordch2 < 0xa0)):
# or (ordch1 == 0xed and ordch2 > 0x9f)
# second byte invalid, take the first and continue
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+1)
result.append(r)
continue
else:
# second byte valid, but third byte missing
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'unexpected end of data',
s, pos, pos+2)
result.append(r)
@@ -183,28 +183,28 @@
(ordch1 == 0xf0 and ordch2 < 0x90) or
(ordch1 == 0xf4 and ordch2 > 0x8f)):
# second byte invalid, take the first and continue
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+1)
result.append(r)
continue
elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2: # 0b10
# third byte invalid, take the first two and continue
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+2)
result.append(r)
continue
else:
# there's only 1 or 2 valid cb, but the others are missing
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'unexpected end of data',
s, pos, pos+charsleft+1)
result.append(r)
break
if n == 0:
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid start byte',
s, pos, pos+1)
result.append(r)
@@ -215,7 +215,7 @@
elif n == 2:
ordch2 = ord(s[pos+1])
if ordch2>>6 != 0x2: # 0b10
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+1)
result.append(r)
@@ -233,13 +233,13 @@
# surrogates shouldn't be valid UTF-8!
or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
):
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+1)
result.append(r)
continue
elif ordch3>>6 != 0x2: # 0b10
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+2)
result.append(r)
@@ -257,19 +257,19 @@
if (ordch2>>6 != 0x2 or # 0b10
(ordch1 == 0xf0 and ordch2 < 0x90) or
(ordch1 == 0xf4 and ordch2 > 0x8f)):
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+1)
result.append(r)
continue
elif ordch3>>6 != 0x2: # 0b10
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+2)
result.append(r)
continue
elif ordch4>>6 != 0x2: # 0b10
- r, pos = errorhandler(errors, 'utf-8',
+ r, pos = errorhandler(errors, 'utf8',
'invalid continuation byte',
s, pos, pos+3)
result.append(r)
@@ -337,7 +337,7 @@
_encodeUCS4(result, ch3)
continue
if not allow_surrogates:
- ru, rs, pos = errorhandler(errors, 'utf-8',
+ ru, rs, pos = errorhandler(errors, 'utf8',
'surrogates not allowed',
s, pos-1, pos)
if rs is not None:
@@ -348,7 +348,7 @@
if ord(ch) < 0x80:
result.append(chr(ord(ch)))
else:
- errorhandler('strict', 'utf-8',
+ errorhandler('strict', 'utf8',
'surrogates not allowed',
s, pos-1, pos)
continue
@@ -441,7 +441,7 @@
if len(s) - pos < 2:
if not final:
break
- r, pos = errorhandler(errors, 'utf-16', "truncated data",
+ r, pos = errorhandler(errors, 'utf16', "truncated data",
s, pos, len(s))
result.append(r)
if len(s) - pos < 2:
@@ -456,7 +456,7 @@
if not final:
break
errmsg = "unexpected end of data"
- r, pos = errorhandler(errors, 'utf-16', errmsg, s, pos - 2, len(s))
+ r, pos = errorhandler(errors, 'utf16', errmsg, s, pos - 2, len(s))
result.append(r)
if len(s) - pos < 2:
break
@@ -472,12 +472,12 @@
(ch2 & 0x3FF)) + 0x10000))
continue
else:
- r, pos = errorhandler(errors, 'utf-16',
+ r, pos = errorhandler(errors, 'utf16',
"illegal UTF-16 surrogate",
s, pos - 4, pos - 2)
result.append(r)
else:
- r, pos = errorhandler(errors, 'utf-16',
+ r, pos = errorhandler(errors, 'utf16',
"illegal encoding",
s, pos - 2, pos)
result.append(r)
@@ -609,7 +609,7 @@
if len(s) - pos < 4:
if not final:
break
- r, pos = errorhandler(errors, 'utf-32', "truncated data",
+ r, pos = errorhandler(errors, 'utf32', "truncated data",
s, pos, len(s))
result.append(r)
if len(s) - pos < 4:
@@ -618,7 +618,7 @@
ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) |
(ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
if ch >= 0x110000:
- r, pos = errorhandler(errors, 'utf-32', "codepoint not in range(0x110000)",
+ r, pos = errorhandler(errors, 'utf32', "codepoint not in range(0x110000)",
s, pos, len(s))
result.append(r)
continue
@@ -846,7 +846,7 @@
if base64bits >= 6:
# We've seen at least one base-64 character
msg = "partial character in shift sequence"
- res, pos = errorhandler(errors, 'utf-7',
+ res, pos = errorhandler(errors, 'utf7',
msg, s, pos-1, pos)
result.append(res)
continue
@@ -854,7 +854,7 @@
# Some bits remain; they should be zero
if base64buffer != 0:
msg = "non-zero padding bits in shift sequence"
- res, pos = errorhandler(errors, 'utf-7',
+ res, pos = errorhandler(errors, 'utf7',
msg, s, pos-1, pos)
result.append(res)
continue
@@ -883,7 +883,7 @@
else:
pos += 1
msg = "unexpected special character"
- res, pos = errorhandler(errors, 'utf-7', msg, s, pos-1, pos)
+ res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos)
result.append(res)
# end of string
@@ -894,7 +894,7 @@
base64bits >= 6 or
(base64bits > 0 and base64buffer != 0)):
msg = "unterminated shift sequence"
- res, pos = errorhandler(errors, 'utf-7', msg, s, shiftOutStartPos, pos)
+ res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
result.append(res)
elif inShift:
pos = shiftOutStartPos # back off output
More information about the pypy-commit
mailing list