[pypy-commit] pypy unicode-utf8-py3: avoid untranslatable unicodehelper.encode_utf8, add TODO note
mattip
pypy.commits at gmail.com
Tue Aug 7 16:03:32 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94968:9fa79905a9c9
Date: 2018-08-07 09:16 -0700
http://bitbucket.org/pypy/pypy/changeset/9fa79905a9c9/
Log: avoid untranslatable unicodehelper.encode_utf8, add TODO note
diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -4,3 +4,4 @@
* improve performance of splitlines
* fix _pypyjson to not use a wrapped dict when decoding an object
* make sure we review all the places that call ord(unichr) to check for ValueErrors
+* rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1882,10 +1882,16 @@
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space).decode('utf8'))
- return unicodehelper.encode_utf8(space, value,
- allow_surrogates=allow_surrogates)
+ # XXX this is the only place in the code that this funcion is called.
+ # It does not translate, since it uses a pypy-level error handler
+ # to throw the UnicodeEncodeError not the rpython default handler
+ #return unicodehelper.encode_utf8(space, value,
+ # allow_surrogates=allow_surrogates)
+ assert isinstance(value, unicode)
+ return value.encode('utf8')
def _rpy_unicode_to_decimal_w(space, unistr):
+ # XXX rewrite this to accept a utf8 string and use a StringBuilder
result = [u'\0'] * len(unistr)
for i in xrange(len(unistr)):
uchr = ord(unistr[i])
More information about the pypy-commit
mailing list