[pypy-commit] pypy unicode-utf8-py3: avoid untranslatable unicodehelper.encode_utf8, add TODO note

mattip pypy.commits at gmail.com
Tue Aug 7 16:03:32 EDT 2018


Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94968:9fa79905a9c9
Date: 2018-08-07 09:16 -0700
http://bitbucket.org/pypy/pypy/changeset/9fa79905a9c9/

Log:	avoid untranslatable unicodehelper.encode_utf8, add TODO note

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -4,3 +4,4 @@
 * improve performance of splitlines
 * fix _pypyjson to not use a wrapped dict when decoding an object
 * make sure we review all the places that call ord(unichr) to check for ValueErrors
+* rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1882,10 +1882,16 @@
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
     value = _rpy_unicode_to_decimal_w(space, w_unistr.utf8_w(space).decode('utf8'))
-    return unicodehelper.encode_utf8(space, value,
-                                     allow_surrogates=allow_surrogates)
+    # XXX this is the only place in the code that this funcion is called.
+    # It does not translate, since it uses a pypy-level error handler
+    # to throw the UnicodeEncodeError not the rpython default handler
+    #return unicodehelper.encode_utf8(space, value,
+    #                                 allow_surrogates=allow_surrogates)
+    assert isinstance(value, unicode)
+    return value.encode('utf8')
 
 def _rpy_unicode_to_decimal_w(space, unistr):
+    # XXX rewrite this to accept a utf8 string and use a StringBuilder
     result = [u'\0'] * len(unistr)
     for i in xrange(len(unistr)):
         uchr = ord(unistr[i])


More information about the pypy-commit mailing list