[pypy-commit] pypy unicode-utf8-py3: remove round-tripping str-unicode from errors
mattip
pypy.commits at gmail.com
Fri Feb 1 08:53:46 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95768:be2b370c0652
Date: 2019-02-01 15:49 +0200
http://bitbucket.org/pypy/pypy/changeset/be2b370c0652/
Log: remove round-tripping str-unicode from errors
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -472,19 +472,7 @@
assert len(formats) > 0, "unsupported: no % command found"
return tuple(parts), tuple(formats)
-def _decode_utf8(string):
- # when building the error message, don't crash if the byte string
- # provided is not valid UTF-8
- if isinstance(string, unicode):
- return string
- assert isinstance(string, str)
- #return string.decode('utf8')
- result, consumed = runicode.str_decode_utf_8(
- string, len(string), "replace", final=True)
- return result
-
def get_operrcls2(valuefmt):
- valuefmt = valuefmt.decode('ascii')
strings, formats = decompose_valuefmt(valuefmt)
assert len(strings) == len(formats) + 1
try:
@@ -503,49 +491,49 @@
self.setup(w_type)
def _compute_value(self, space):
- # TODO: avoid utf8->unicode->utf8 dance
lst = [None] * (len(formats) + len(formats) + 1)
+ lgt = 0
for i, fmt, attr in entries:
lst[i + i] = self.xstrings[i]
+ lgt += len(self.xstrings[i])
value = getattr(self, attr)
if fmt == 'd':
- result = str(value).decode('ascii')
+ result = str(value)
+ lgt += len(result)
elif fmt == 'R':
- result = space.realunicode_w(space.repr(value))
+ result = space.utf8_w(space.repr(value))
+ lgt += len(result)
elif fmt == 'S':
- result = space.realunicode_w(space.str(value))
+ result = space.utf8_w(space.str(value))
+ lgt += len(result)
elif fmt == 'T':
- result = _decode_utf8(space.type(value).name)
+ result = space.type(value).name
+ lgt += len(result)
elif fmt == 'N':
- name = value.getname(space)
- if isinstance(name, unicode):
- result = name
- else:
- result = _decode_utf8(name)
+ result = value.getname(space)
+ lgt += len(result)
elif fmt == '8':
# u'str\uxxxx' -> 'str\xXX\xXX' -> u"'str\xXX\xXX'"
- if isinstance(value, unicode):
- result = runicode.unicode_encode_utf_8(value,
- len(value), 'strict', allow_surrogates=True)
- else:
- from pypy.interpreter import unicodehelper
- result = _decode_utf8(unicodehelper.str_decode_utf8(
- value, 'replace', True,
- unicodehelper.decode_never_raise, True)[0])
+ from pypy.interpreter import unicodehelper
+ result, _lgt, pos = unicodehelper.str_decode_utf8(
+ value, 'replace', True,
+ unicodehelper.decode_never_raise, True)
+ lgt += _lgt
+ elif isinstance(value, unicode):
+ # 's'
+ result = str(value.encode('utf-8'))
+ lgt += len(value)
else:
- if isinstance(value, unicode):
- result = value
- else:
- result = _decode_utf8(str(value))
+ from rpython.rlib import rutf8
+ result = str(value)
+ # Assumes valid utf-8
+ lgt += rutf8.check_utf8(result, True)
lst[i + i + 1] = result
lst[-1] = self.xstrings[-1]
- retval = u''.join(lst)
- # We need to annotate both allow_surrogates=True,False
- # since this function is used to replace uni.encode('utf8')
- # deep in rpython
- return runicode.unicode_encode_utf_8(retval, len(retval),
- 'strict', allow_surrogates=False), len(retval)
- #
+ lgt += len(self.xstrings[-1])
+ retval = ''.join(lst)
+ return retval, lgt
+
_fmtcache2[formats] = OpErrFmt
return OpErrFmt, strings
More information about the pypy-commit
mailing list