[pypy-commit] pypy decimal-libmpdec: hg graft 10fd806838e2
amauryfa
noreply at buildbot.pypy.org
Thu Sep 18 00:05:11 CEST 2014
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: decimal-libmpdec
Changeset: r73597:819424d457d9
Date: 2014-09-17 22:33 +0200
http://bitbucket.org/pypy/pypy/changeset/819424d457d9/
Log: hg graft 10fd806838e2
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -82,12 +82,6 @@
v = PyString_DecodeEscape(space, substr, 'strict', encoding)
return space.wrapbytes(v)
-def hexbyte(val):
- result = "%x" % val
- if len(result) == 1:
- result = "0" + result
- return result
-
def decode_unicode_utf8(space, s, ps, q):
# ****The Python 2.7 version, producing UTF-32 escapes****
# String is utf8-encoded, but 'unicode_escape' expects
@@ -107,15 +101,14 @@
# instead.
lis.append("u005c")
if ord(s[ps]) & 0x80: # XXX inefficient
- w, ps = decode_utf8(space, s, ps, end, "utf-32-be")
- rn = len(w)
- assert rn % 4 == 0
- for i in range(0, rn, 4):
- lis.append('\\U')
- lis.append(hexbyte(ord(w[i])))
- lis.append(hexbyte(ord(w[i+1])))
- lis.append(hexbyte(ord(w[i+2])))
- lis.append(hexbyte(ord(w[i+3])))
+ w, ps = decode_utf8(space, s, ps, end)
+ for c in w:
+ # The equivalent of %08x, which is not supported by RPython.
+ # 7 zeroes are enough for the unicode range, and the
+ # result still fits in 32-bit.
+ hexa = hex(ord(c) + 0x10000000)
+ lis.append('\\U0')
+ lis.append(hexa[3:]) # Skip 0x and the leading 1
else:
lis.append(s[ps])
ps += 1
@@ -135,7 +128,7 @@
# note that the C code has a label here.
# the logic is the same.
if recode_encoding and ord(s[ps]) & 0x80:
- w, ps = decode_utf8(space, s, ps, end, recode_encoding)
+ w, ps = decode_utf8_recode(space, s, ps, end, recode_encoding)
# Append bytes to output buffer.
builder.append(w)
else:
@@ -222,14 +215,18 @@
ch >= 'A' and ch <= 'F')
-def decode_utf8(space, s, ps, end, encoding):
+def decode_utf8(space, s, ps, end):
assert ps >= 0
pt = ps
# while (s < end && *s != '\\') s++; */ /* inefficient for u".."
while ps < end and ord(s[ps]) & 0x80:
ps += 1
- w_u = space.wrap(unicodehelper.decode_utf8(space, s[pt:ps]))
- w_v = unicodehelper.encode(space, w_u, encoding)
+ u = unicodehelper.decode_utf8(space, s[pt:ps])
+ return u, ps
+
+def decode_utf8_recode(space, s, ps, end, recode_encoding):
+ u, ps = decode_utf8(space, s, ps, end)
+ w_v = unicodehelper.encode(space, space.wrap(u), recode_encoding)
v = space.bytes_w(w_v)
return v, ps
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -13,6 +13,7 @@
@specialize.memo()
def decode_error_handler(space):
+ # Fast version of the "strict" errors handler.
def raise_unicode_exception_decode(errors, encoding, msg, s,
startingpos, endingpos):
raise OperationError(space.w_UnicodeDecodeError,
@@ -25,6 +26,7 @@
@specialize.memo()
def encode_error_handler(space):
+ # Fast version of the "strict" errors handler.
def raise_unicode_exception_encode(errors, encoding, msg, u,
startingpos, endingpos):
raise OperationError(space.w_UnicodeEncodeError,
More information about the pypy-commit
mailing list