[pypy-commit] pypy decimal-libmpdec: hg graft 10fd806838e2

Thu Sep 18 00:05:11 CEST 2014

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: decimal-libmpdec
Changeset: r73597:819424d457d9
Date: 2014-09-17 22:33 +0200
http://bitbucket.org/pypy/pypy/changeset/819424d457d9/

Log:	hg graft 10fd806838e2

diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -82,12 +82,6 @@
     v = PyString_DecodeEscape(space, substr, 'strict', encoding)
     return space.wrapbytes(v)
 
-def hexbyte(val):
-    result = "%x" % val
-    if len(result) == 1:
-        result = "0" + result
-    return result
-
 def decode_unicode_utf8(space, s, ps, q):
     # ****The Python 2.7 version, producing UTF-32 escapes****
     # String is utf8-encoded, but 'unicode_escape' expects
@@ -107,15 +101,14 @@
                 # instead.
                 lis.append("u005c")
         if ord(s[ps]) & 0x80: # XXX inefficient
-            w, ps = decode_utf8(space, s, ps, end, "utf-32-be")
-            rn = len(w)
-            assert rn % 4 == 0
-            for i in range(0, rn, 4):
-                lis.append('\\U')
-                lis.append(hexbyte(ord(w[i])))
-                lis.append(hexbyte(ord(w[i+1])))
-                lis.append(hexbyte(ord(w[i+2])))
-                lis.append(hexbyte(ord(w[i+3])))
+            w, ps = decode_utf8(space, s, ps, end)
+            for c in w:
+                # The equivalent of %08x, which is not supported by RPython.
+                # 7 zeroes are enough for the unicode range, and the
+                # result still fits in 32-bit.
+                hexa = hex(ord(c) + 0x10000000)
+                lis.append('\\U0')
+                lis.append(hexa[3:])  # Skip 0x and the leading 1
         else:
             lis.append(s[ps])
             ps += 1
@@ -135,7 +128,7 @@
             # note that the C code has a label here.
             # the logic is the same.
             if recode_encoding and ord(s[ps]) & 0x80:
-                w, ps = decode_utf8(space, s, ps, end, recode_encoding)
+                w, ps = decode_utf8_recode(space, s, ps, end, recode_encoding)
                 # Append bytes to output buffer.
                 builder.append(w)
             else:
@@ -222,14 +215,18 @@
             ch >= 'A' and ch <= 'F')
 
 
-def decode_utf8(space, s, ps, end, encoding):
+def decode_utf8(space, s, ps, end):
     assert ps >= 0
     pt = ps
     # while (s < end && *s != '\\') s++; */ /* inefficient for u".."
     while ps < end and ord(s[ps]) & 0x80:
         ps += 1
-    w_u = space.wrap(unicodehelper.decode_utf8(space, s[pt:ps]))
-    w_v = unicodehelper.encode(space, w_u, encoding)
+    u = unicodehelper.decode_utf8(space, s[pt:ps])
+    return u, ps
+
+def decode_utf8_recode(space, s, ps, end, recode_encoding):
+    u, ps = decode_utf8(space, s, ps, end)
+    w_v = unicodehelper.encode(space, space.wrap(u), recode_encoding)
     v = space.bytes_w(w_v)
     return v, ps
 
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -13,6 +13,7 @@
 
 @specialize.memo()
 def decode_error_handler(space):
+    # Fast version of the "strict" errors handler.
     def raise_unicode_exception_decode(errors, encoding, msg, s,
                                        startingpos, endingpos):
         raise OperationError(space.w_UnicodeDecodeError,
@@ -25,6 +26,7 @@
 
 @specialize.memo()
 def encode_error_handler(space):
+    # Fast version of the "strict" errors handler.
     def raise_unicode_exception_encode(errors, encoding, msg, u,
                                        startingpos, endingpos):
         raise OperationError(space.w_UnicodeEncodeError,