[pypy-commit] pypy unicode-utf8-py3: decode takes unicode input

mattip pypy.commits at gmail.com
Tue Aug 7 16:03:34 EDT 2018


Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94969:c27f5e8d2da0
Date: 2018-08-07 09:16 -0700
http://bitbucket.org/pypy/pypy/changeset/c27f5e8d2da0/

Log:	decode takes unicode input

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -45,12 +45,14 @@
             w_errorhandler = lookup_error(space, errors)
             if decode:
                 w_cls = space.w_UnicodeDecodeError
+                assert isinstance(input, str)
                 w_input = space.newbytes(input)
                 length = len(input)
             else:
                 w_cls = space.w_UnicodeEncodeError
-                length = rutf8.codepoints_in_utf8(input)
-                w_input = space.newtext((input, length, length))
+                length = len(input)
+                assert isinstance(input, unicode)
+                w_input = space.newtext((input.encode('utf8'), length, length))
             w_exc =  space.call_function(
                 w_cls,
                 space.newtext(encoding),
@@ -721,12 +723,11 @@
     if errors is None:
         errors = 'strict'
     state = space.fromcache(CodecState)
-    # NB. can't call unicode_encode_utf_8() directly because that's
-    # an @elidable function nowadays.  Instead, we need the _impl().
-    # (The problem is the errorhandler, which calls arbitrary Python.)
-    result = runicode.unicode_encode_utf_8_impl(
-        utf8, lgt, errors, state.encode_error_handler,
-        allow_surrogates=False)
+    #result = runicode.unicode_encode_utf_8_impl(
+    #    utf8, lgt, errors, state.encode_error_handler,
+    #    allow_surrogates=False)
+    result = unicodehelper.utf8_encode_utf_8(utf8, errors,
+                     state.encode_error_handler, allow_surrogates=False)
     return space.newtuple([space.newbytes(result), space.newint(lgt)])
 
 @unwrap_spec(string='bufferstr', errors='text_or_none',


More information about the pypy-commit mailing list