[pypy-commit] pypy unicode-utf8-py3: decode takes unicode input
mattip
pypy.commits at gmail.com
Tue Aug 7 16:03:34 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94969:c27f5e8d2da0
Date: 2018-08-07 09:16 -0700
http://bitbucket.org/pypy/pypy/changeset/c27f5e8d2da0/
Log: decode takes unicode input
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -45,12 +45,14 @@
w_errorhandler = lookup_error(space, errors)
if decode:
w_cls = space.w_UnicodeDecodeError
+ assert isinstance(input, str)
w_input = space.newbytes(input)
length = len(input)
else:
w_cls = space.w_UnicodeEncodeError
- length = rutf8.codepoints_in_utf8(input)
- w_input = space.newtext((input, length, length))
+ length = len(input)
+ assert isinstance(input, unicode)
+ w_input = space.newtext((input.encode('utf8'), length, length))
w_exc = space.call_function(
w_cls,
space.newtext(encoding),
@@ -721,12 +723,11 @@
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- # NB. can't call unicode_encode_utf_8() directly because that's
- # an @elidable function nowadays. Instead, we need the _impl().
- # (The problem is the errorhandler, which calls arbitrary Python.)
- result = runicode.unicode_encode_utf_8_impl(
- utf8, lgt, errors, state.encode_error_handler,
- allow_surrogates=False)
+ #result = runicode.unicode_encode_utf_8_impl(
+ # utf8, lgt, errors, state.encode_error_handler,
+ # allow_surrogates=False)
+ result = unicodehelper.utf8_encode_utf_8(utf8, errors,
+ state.encode_error_handler, allow_surrogates=False)
return space.newtuple([space.newbytes(result), space.newint(lgt)])
@unwrap_spec(string='bufferstr', errors='text_or_none',
More information about the pypy-commit
mailing list