[pypy-commit] pypy default: use a utf8 specific error for OutOfRange

Tue Feb 19 14:23:42 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r96096:213fc2573b4d
Date: 2019-02-19 21:05 +0200
http://bitbucket.org/pypy/pypy/changeset/213fc2573b4d/

Log:	use a utf8 specific error for OutOfRange

diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -175,7 +175,7 @@
         value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
         try:
             utf8 = rutf8.unichr_as_utf8(value, allow_surrogates=True)
-        except ValueError:
+        except rutf8.OutOfRange:
             if self.is_signed_wchar:
                 s = hex(intmask(value))
             else:
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -24,7 +24,7 @@
         j += 1
         try:
             rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True)
-        except ValueError:
+        except rutf8.OutOfRange:
             raise OutOfRange(ch)
     return u.build(), length
 
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -473,7 +473,7 @@
                 if do_unicode:
                     try:
                         c = rutf8.unichr_as_utf8(r_uint(n))
-                    except ValueError:
+                    except rutf8.OutOfRange:
                         raise oefmt(space.w_OverflowError,
                                     "unicode character code out of range")
                     self.std_wp(c, False)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -30,6 +30,11 @@
 MAXUNICODE = 0x10ffff
 allow_surrogate_by_default = False
 
+
+class OutOfRange(Exception):
+    def __init__(self, code):
+        self.code = code
+
 # we need a way to accept both r_uint and int(nonneg=True)
 #@signature(types.int_nonneg(), types.bool(), returns=types.str())
 def unichr_as_utf8(code, allow_surrogates=False):
@@ -44,7 +49,7 @@
         return chr((0xc0 | (code >> 6))) + chr((0x80 | (code & 0x3f)))
     if code <= r_uint(0xFFFF):
         if not allow_surrogates and 0xD800 <= code <= 0xDfff:
-            raise ValueError
+            raise OutOfRange(code)
         return (chr((0xe0 | (code >> 12))) +
                 chr((0x80 | ((code >> 6) & 0x3f))) +
                 chr((0x80 | (code & 0x3f))))
@@ -53,7 +58,7 @@
                 chr((0x80 | ((code >> 12) & 0x3f))) +
                 chr((0x80 | ((code >> 6) & 0x3f))) +
                 chr((0x80 | (code & 0x3f))))
-    raise ValueError
+    raise OutOfRange(code)
 
 @try_inline
 def unichr_as_utf8_append(builder, code, allow_surrogates=False):
@@ -89,7 +94,7 @@
         builder.append(chr((0x80 | ((code >> 6) & 0x3f))))
         builder.append(chr((0x80 | (code & 0x3f))))
         return
-    raise ValueError('character U+%x is not in range [U+0000; U+10ffff]' % code)
+    raise OutOfRange(code)
 
 @dont_inline
 def _nonascii_unichr_as_utf8_append_nosurrogates(builder, code):
@@ -110,7 +115,7 @@
         builder.append(chr((0x80 | ((code >> 6) & 0x3f))))
         builder.append(chr((0x80 | (code & 0x3f))))
         return
-    raise ValueError
+    raise OutOfRange(code)
 
 
 # note - table lookups are really slow. Measured on various elements of obama
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -9,7 +9,7 @@
 def test_unichr_as_utf8(c, allow_surrogates):
     i = ord(c)
     if not allow_surrogates and 0xD800 <= i <= 0xDFFF:
-        with pytest.raises(ValueError):
+        with pytest.raises(rutf8.OutOfRange):
             rutf8.unichr_as_utf8(i, allow_surrogates)
     else:
         u = rutf8.unichr_as_utf8(i, allow_surrogates)