[pypy-commit] pypy default: Cleanup unicode_to_decimal_w() to make it more similar to pypy3
rlamy
pypy.commits at gmail.com
Mon Jun 17 14:17:16 EDT 2019
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch:
Changeset: r96814:cde3d214c398
Date: 2019-06-17 18:26 +0100
http://bitbucket.org/pypy/pypy/changeset/cde3d214c398/
Log: Cleanup unicode_to_decimal_w() to make it more similar to pypy3
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -49,7 +49,6 @@
# special-case in Python 2, which is exactly what we want here
assert length == len(utf8str.decode('utf-8'))
-
@staticmethod
def from_utf8builder(builder):
return W_UnicodeObject(
@@ -1097,11 +1096,11 @@
if rutf8.has_surrogates(utf8):
utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
return space.newbytes(utf8)
- return encode(space, w_obj, encoding, errors)
+ return encode(space, w_obj, encoding, errors)
def decode_object(space, w_obj, encoding, errors):
- from pypy.module._codecs.interp_codecs import lookup_codec, decode
+ from pypy.module._codecs.interp_codecs import lookup_codec, decode
if errors is None or errors == 'strict':
# fast paths
if encoding is None:
@@ -1111,7 +1110,7 @@
unicodehelper.check_ascii_or_raise(space, s)
return space.newutf8(s, len(s))
if encoding == 'utf-8' or encoding == 'utf8':
- if (space.isinstance_w(w_obj, space.w_unicode) or
+ if (space.isinstance_w(w_obj, space.w_unicode) or
space.isinstance_w(w_obj, space.w_bytes)):
s = space.utf8_w(w_obj)
else:
@@ -1720,34 +1719,28 @@
def unicode_to_decimal_w(space, w_unistr):
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
- unistr = w_unistr._utf8
- result = ['\0'] * w_unistr._length
- digits = ['0', '1', '2', '3', '4',
- '5', '6', '7', '8', '9']
- res_pos = 0
- iter = rutf8.Utf8StringIterator(unistr)
- for uchr in iter:
+ utf8 = w_unistr._utf8
+ result = StringBuilder(w_unistr._len())
+ it = rutf8.Utf8StringIterator(utf8)
+ for uchr in it:
if W_UnicodeObject._isspace(uchr):
- result[res_pos] = ' '
- res_pos += 1
+ result.append(' ')
continue
- try:
- result[res_pos] = digits[unicodedb.decimal(uchr)]
- except KeyError:
- if 0 < uchr < 256:
- result[res_pos] = chr(uchr)
- else:
+ if not (0 < uchr < 256):
+ try:
+ uchr = ord('0') + unicodedb.decimal(uchr)
+ except KeyError:
w_encoding = space.newtext('decimal')
- pos = iter.get_pos()
+ pos = it.get_pos()
w_start = space.newint(pos)
- w_end = space.newint(pos+1)
+ w_end = space.newint(pos + 1)
w_reason = space.newtext('invalid decimal Unicode string')
raise OperationError(space.w_UnicodeEncodeError,
- space.newtuple([w_encoding, w_unistr,
- w_start, w_end,
- w_reason]))
- res_pos += 1
- return ''.join(result)
+ space.newtuple([w_encoding, w_unistr,
+ w_start, w_end,
+ w_reason]))
+ result.append(chr(uchr))
+ return result.build()
_repr_function = rutf8.make_utf8_escape_function(
More information about the pypy-commit
mailing list