[pypy-commit] pypy default: Cleanup unicode_to_decimal_w() to make it more similar to pypy3

rlamy pypy.commits at gmail.com
Mon Jun 17 14:17:16 EDT 2019


Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r96814:cde3d214c398
Date: 2019-06-17 18:26 +0100
http://bitbucket.org/pypy/pypy/changeset/cde3d214c398/

Log:	Cleanup unicode_to_decimal_w() to make it more similar to pypy3

diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -49,7 +49,6 @@
             # special-case in Python 2, which is exactly what we want here
             assert length == len(utf8str.decode('utf-8'))
 
-
     @staticmethod
     def from_utf8builder(builder):
         return W_UnicodeObject(
@@ -1097,11 +1096,11 @@
             if rutf8.has_surrogates(utf8):
                 utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
             return space.newbytes(utf8)
-    return encode(space, w_obj, encoding, errors) 
+    return encode(space, w_obj, encoding, errors)
 
 
 def decode_object(space, w_obj, encoding, errors):
-    from pypy.module._codecs.interp_codecs import lookup_codec, decode 
+    from pypy.module._codecs.interp_codecs import lookup_codec, decode
     if errors is None or errors == 'strict':
         # fast paths
         if encoding is None:
@@ -1111,7 +1110,7 @@
             unicodehelper.check_ascii_or_raise(space, s)
             return space.newutf8(s, len(s))
         if encoding == 'utf-8' or encoding == 'utf8':
-            if (space.isinstance_w(w_obj, space.w_unicode) or 
+            if (space.isinstance_w(w_obj, space.w_unicode) or
                 space.isinstance_w(w_obj, space.w_bytes)):
                 s = space.utf8_w(w_obj)
             else:
@@ -1720,34 +1719,28 @@
 def unicode_to_decimal_w(space, w_unistr):
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
-    unistr = w_unistr._utf8
-    result = ['\0'] * w_unistr._length
-    digits = ['0', '1', '2', '3', '4',
-              '5', '6', '7', '8', '9']
-    res_pos = 0
-    iter = rutf8.Utf8StringIterator(unistr)
-    for uchr in iter:
+    utf8 = w_unistr._utf8
+    result = StringBuilder(w_unistr._len())
+    it = rutf8.Utf8StringIterator(utf8)
+    for uchr in it:
         if W_UnicodeObject._isspace(uchr):
-            result[res_pos] = ' '
-            res_pos += 1
+            result.append(' ')
             continue
-        try:
-            result[res_pos] = digits[unicodedb.decimal(uchr)]
-        except KeyError:
-            if 0 < uchr < 256:
-                result[res_pos] = chr(uchr)
-            else:
+        if not (0 < uchr < 256):
+            try:
+                uchr = ord('0') + unicodedb.decimal(uchr)
+            except KeyError:
                 w_encoding = space.newtext('decimal')
-                pos = iter.get_pos()
+                pos = it.get_pos()
                 w_start = space.newint(pos)
-                w_end = space.newint(pos+1)
+                w_end = space.newint(pos + 1)
                 w_reason = space.newtext('invalid decimal Unicode string')
                 raise OperationError(space.w_UnicodeEncodeError,
-                                     space.newtuple([w_encoding, w_unistr,
-                                                     w_start, w_end,
-                                                     w_reason]))
-        res_pos += 1
-    return ''.join(result)
+                                        space.newtuple([w_encoding, w_unistr,
+                                                        w_start, w_end,
+                                                        w_reason]))
+        result.append(chr(uchr))
+    return result.build()
 
 
 _repr_function = rutf8.make_utf8_escape_function(


More information about the pypy-commit mailing list