[pypy-commit] pypy default: Copy the CPython-style error messages more closely

Sat Jan 18 14:44:51 CET 2014

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r68763:3f01d4d55bcc
Date: 2014-01-18 14:44 +0100
http://bitbucket.org/pypy/pypy/changeset/3f01d4d55bcc/

Log:	Copy the CPython-style error messages more closely

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -153,7 +153,7 @@
             # about the pos anymore and we just ignore the value
             if not charsleft:
                 # there's only the start byte and nothing else
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+1)
                 result.append(r)
@@ -165,14 +165,14 @@
                     (ordch1 == 0xe0 and ordch2 < 0xa0)):
                     # or (ordch1 == 0xed and ordch2 > 0x9f)
                     # second byte invalid, take the first and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+1)
                     result.append(r)
                     continue
                 else:
                     # second byte valid, but third byte missing
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+2)
                     result.append(r)
@@ -183,28 +183,28 @@
                     (ordch1 == 0xf0 and ordch2 < 0x90) or
                     (ordch1 == 0xf4 and ordch2 > 0x8f)):
                     # second byte invalid, take the first and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+1)
                     result.append(r)
                     continue
                 elif charsleft == 2 and ord(s[pos+2])>>6 != 0x2:   # 0b10
                     # third byte invalid, take the first two and continue
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                           'invalid continuation byte',
                                           s, pos, pos+2)
                     result.append(r)
                     continue
                 else:
                     # there's only 1 or 2 valid cb, but the others are missing
-                    r, pos = errorhandler(errors, 'utf-8',
+                    r, pos = errorhandler(errors, 'utf8',
                                       'unexpected end of data',
                                       s, pos, pos+charsleft+1)
                     result.append(r)
                     break
 
         if n == 0:
-            r, pos = errorhandler(errors, 'utf-8',
+            r, pos = errorhandler(errors, 'utf8',
                                   'invalid start byte',
                                   s, pos, pos+1)
             result.append(r)
@@ -215,7 +215,7 @@
         elif n == 2:
             ordch2 = ord(s[pos+1])
             if ordch2>>6 != 0x2:   # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
@@ -233,13 +233,13 @@
                 # surrogates shouldn't be valid UTF-8!
                 or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
                 ):
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
             elif ordch3>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
@@ -257,19 +257,19 @@
             if (ordch2>>6 != 0x2 or     # 0b10
                 (ordch1 == 0xf0 and ordch2 < 0x90) or
                 (ordch1 == 0xf4 and ordch2 > 0x8f)):
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
             elif ordch3>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
                 continue
             elif ordch4>>6 != 0x2:     # 0b10
-                r, pos = errorhandler(errors, 'utf-8',
+                r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+3)
                 result.append(r)
@@ -337,7 +337,7 @@
                             _encodeUCS4(result, ch3)
                             continue
                     if not allow_surrogates:
-                        ru, rs, pos = errorhandler(errors, 'utf-8',
+                        ru, rs, pos = errorhandler(errors, 'utf8',
                                                    'surrogates not allowed',
                                                    s, pos-1, pos)
                         if rs is not None:
@@ -348,7 +348,7 @@
                             if ord(ch) < 0x80:
                                 result.append(chr(ord(ch)))
                             else:
-                                errorhandler('strict', 'utf-8',
+                                errorhandler('strict', 'utf8',
                                              'surrogates not allowed',
                                              s, pos-1, pos)
                         continue
@@ -441,7 +441,7 @@
         if len(s) - pos < 2:
             if not final:
                 break
-            r, pos = errorhandler(errors, 'utf-16', "truncated data",
+            r, pos = errorhandler(errors, 'utf16', "truncated data",
                                   s, pos, len(s))
             result.append(r)
             if len(s) - pos < 2:
@@ -456,7 +456,7 @@
             if not final:
                 break
             errmsg = "unexpected end of data"
-            r, pos = errorhandler(errors, 'utf-16', errmsg, s, pos - 2, len(s))
+            r, pos = errorhandler(errors, 'utf16', errmsg, s, pos - 2, len(s))
             result.append(r)
             if len(s) - pos < 2:
                 break
@@ -472,12 +472,12 @@
                                            (ch2 & 0x3FF)) + 0x10000))
                 continue
             else:
-                r, pos = errorhandler(errors, 'utf-16',
+                r, pos = errorhandler(errors, 'utf16',
                                       "illegal UTF-16 surrogate",
                                       s, pos - 4, pos - 2)
                 result.append(r)
         else:
-            r, pos = errorhandler(errors, 'utf-16',
+            r, pos = errorhandler(errors, 'utf16',
                                   "illegal encoding",
                                   s, pos - 2, pos)
             result.append(r)
@@ -609,7 +609,7 @@
         if len(s) - pos < 4:
             if not final:
                 break
-            r, pos = errorhandler(errors, 'utf-32', "truncated data",
+            r, pos = errorhandler(errors, 'utf32', "truncated data",
                                   s, pos, len(s))
             result.append(r)
             if len(s) - pos < 4:
@@ -618,7 +618,7 @@
         ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) |
               (ord(s[pos + iorder[1]]) << 8)  | ord(s[pos + iorder[0]]))
         if ch >= 0x110000:
-            r, pos = errorhandler(errors, 'utf-32', "codepoint not in range(0x110000)",
+            r, pos = errorhandler(errors, 'utf32', "codepoint not in range(0x110000)",
                                   s, pos, len(s))
             result.append(r)
             continue
@@ -846,7 +846,7 @@
                     if base64bits >= 6:
                         # We've seen at least one base-64 character
                         msg = "partial character in shift sequence"
-                        res, pos = errorhandler(errors, 'utf-7',
+                        res, pos = errorhandler(errors, 'utf7',
                                                 msg, s, pos-1, pos)
                         result.append(res)
                         continue
@@ -854,7 +854,7 @@
                         # Some bits remain; they should be zero
                         if base64buffer != 0:
                             msg = "non-zero padding bits in shift sequence"
-                            res, pos = errorhandler(errors, 'utf-7',
+                            res, pos = errorhandler(errors, 'utf7',
                                                     msg, s, pos-1, pos)
                             result.append(res)
                             continue
@@ -883,7 +883,7 @@
         else:
             pos += 1
             msg = "unexpected special character"
-            res, pos = errorhandler(errors, 'utf-7', msg, s, pos-1, pos)
+            res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos)
             result.append(res)
 
     # end of string
@@ -894,7 +894,7 @@
             base64bits >= 6 or
             (base64bits > 0 and base64buffer != 0)):
             msg = "unterminated shift sequence"
-            res, pos = errorhandler(errors, 'utf-7', msg, s, shiftOutStartPos, pos)
+            res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
             result.append(res)
     elif inShift:
         pos = shiftOutStartPos # back off output