[pypy-commit] pypy unicode-utf8-py3: fixes for failures in array, _pypyjson

Wed Sep 5 08:10:30 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95079:68f116b03eb4
Date: 2018-09-02 17:11 +0200
http://bitbucket.org/pypy/pypy/changeset/68f116b03eb4/

Log:	fixes for failures in array, _pypyjson

diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -74,10 +74,6 @@
                 break
         return i
 
-    @specialize.arg(1)
-    def _raise(self, msg, *args):
-        raise oefmt(self.space.w_ValueError, msg, *args)
-
     def decode_any(self, i):
         i = self.skip_whitespace(i)
         ch = self.ll_chars[i]
@@ -330,10 +326,10 @@
                 i = self.decode_escape_sequence(i, builder)
             elif ch < '\x20':
                 if ch == '\0':
-                    self._raise("Unterminated string starting at char %d",
+                    raise DecoderError("Unterminated string starting at",
                                 start - 1)
                 else:
-                    self._raise("Invalid control character at char %d", i-1)
+                    raise DecoderError("Invalid control character at", i-1)
             else:
                 builder.append(ch)
 
@@ -368,7 +364,7 @@
                     val = self.decode_surrogate_pair(i, val)
                     i += 6
         except ValueError:
-            self._raise("Invalid \uXXXX escape (char %d)", i-1)
+            raise DecoderError("Invalid \uXXXX escape (char %d)", i-1)
             return # help the annotator to know that we'll never go beyond
                    # this point
         #
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,4 +1,5 @@
 from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import Utf8StringIterator
 
 HEX = '0123456789abcdef'
 
@@ -25,8 +26,7 @@
 
     sb = StringBuilder(len(u) + 20)
 
-    for i in range(len(u)):
-        c = ord(u[i])
+    for c in Utf8StringIterator(u):
         if c <= ord('~'):
             if c == ord('"') or c == ord('\\'):
                 sb.append('\\')
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -429,7 +429,7 @@
         if len(s) % self.itemsize != 0:
             raise oefmt(space.w_ValueError,
                         "bytes length not a multiple of item size")
-        self.check_valid_unicode(space, s) # empty for non-u arrays
+        #self.check_valid_unicode(space, s) # empty for non-u arrays
         oldlen = self.len
         new = len(s) / self.itemsize
         if not new:
@@ -757,7 +757,7 @@
             return space.newtext("array('%s')" % self.typecode)
         elif self.typecode == "u":
             r = space.repr(self.descr_tounicode(space))
-            s = b"array('b', %s)" % space.utf8_w(r)
+            s = "array('%s', %s)" % (self.typecode, space.text_w(r))
             return space.newtext(s)
         else:
             r = space.repr(self.descr_tolist(space))