[pypy-commit] pypy default: Don't raise ValueError when comparing arrays with out-of-bound unicodes

arigo pypy.commits at gmail.com
Thu Apr 18 05:38:09 EDT 2019


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r96519:8fde6d376fef
Date: 2019-04-18 11:37 +0200
http://bitbucket.org/pypy/pypy/changeset/8fde6d376fef/

Log:	Don't raise ValueError when comparing arrays with out-of-bound
	unicodes

diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -74,8 +74,8 @@
     lgt = min(arr1.len, arr2.len)
     for i in range(lgt):
         arr_eq_driver.jit_merge_point(comp_func=comp_op)
-        w_elem1 = arr1.w_getitem(space, i)
-        w_elem2 = arr2.w_getitem(space, i)
+        w_elem1 = arr1.w_getitem(space, i, integer_instead_of_char=True)
+        w_elem2 = arr2.w_getitem(space, i, integer_instead_of_char=True)
         if comp_op == EQ:
             res = space.eq_w(w_elem1, w_elem2)
             if not res:
@@ -1036,10 +1036,11 @@
             else:
                 self.fromsequence(w_iterable)
 
-        def w_getitem(self, space, idx):
+        def w_getitem(self, space, idx, integer_instead_of_char=False):
             item = self.get_buffer()[idx]
             keepalive_until_here(self)
-            if mytype.typecode in 'bBhHil':
+            if mytype.typecode in 'bBhHil' or (
+                    integer_instead_of_char and mytype.typecode in 'cu'):
                 item = rffi.cast(lltype.Signed, item)
                 return space.newint(item)
             if mytype.typecode in 'IL':
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -846,14 +846,21 @@
         assert repr(mya('i', [1, 2, 3])) == "array('i', [1, 2, 3])"
         assert repr(mya('i', (1, 2, 3))) == "array('i', [1, 2, 3])"
 
+    def test_array_of_chars_equality(self):
+        input_bytes = '\x01\x63a\x00!'
+        a = self.array('c', input_bytes)
+        b = self.array('c', input_bytes)
+        b.byteswap()
+        assert a == b
+
     def test_unicode_outofrange(self):
         input_unicode = u'\x01\u263a\x00\ufeff'
         a = self.array('u', input_unicode)
         b = self.array('u', input_unicode)
         b.byteswap()
         assert b[2] == u'\u0000'
-        raises(ValueError, "b[1]")        # doesn't work
-        e = raises(ValueError, "a != b")  # doesn't work
+        assert a != b
+        e = raises(ValueError, "b[0]")        # doesn't work
         assert str(e.value) == (
             "cannot operate on this array('u') because it contains"
             " character U+1000000 not in range [U+0000; U+10ffff]"


More information about the pypy-commit mailing list