[pypy-commit] pypy unicode-utf8-py3: uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni), 'strict')
mattip
pypy.commits at gmail.com
Sat Sep 1 10:59:27 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95061:ef8722afb037
Date: 2018-08-31 14:29 +0200
http://bitbucket.org/pypy/pypy/changeset/ef8722afb037/
Log: uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni),
'strict')
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -21,7 +21,7 @@
"""Translate an error code to a unicode message string."""
from pypy.module._codecs.locale import str_decode_locale_surrogateescape
uni = str_decode_locale_surrogateescape(os.strerror(errno))
- return uni.encode('utf8'), len(uni)
+ return runicode.unicode_encode_utf_8(uni, len(uni), 'strict')
class OperationError(Exception):
"""Interpreter-level exception that signals an exception that should be
@@ -647,7 +647,8 @@
msg = u'Windows Error %d' % winerror
w_errno = space.w_None
w_winerror = space.newint(winerror)
- w_msg = space.newtext(msg.encode('utf8'), len(msg))
+ msg_utf8 = runicode.unicode_encode_utf_8(msg, len(msg), 'strict')
+ w_msg = space.newtext(msg_utf8, len(msg))
else:
errno = e.errno
if errno == EINTR:
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -317,7 +317,8 @@
errorhandler = decode_error_handler(space)
res, size = str_decode_mbcs(s, slen, final=final, errors=errors,
errorhandler=errorhandler)
- return res.encode('utf8'), len(res)
+ res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict')
+ return res_utf8, len(res)
def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False):
""" Same as checking for the valid utf8, but we know the utf8 is not
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -444,7 +444,9 @@
ch = 0
if ch == 0:
raise OperationError(space.type(w_exc), w_exc)
- return space.newtuple([space.newtext(unichr(ch).encode('utf8'), 1),
+ ch_utf8 = runicode.unicode_encode_utf_8(unichr(ch), 1, 'strict',
+ allow_surrogates=True)
+ return space.newtuple([space.newtext(ch_utf8, 1),
space.newint(start + bytelength)])
else:
raise oefmt(space.w_TypeError,
@@ -483,7 +485,9 @@
if not consumed:
# codec complained about ASCII byte.
raise OperationError(space.type(w_exc), w_exc)
- return space.newtuple([space.newtext(replace.encode('utf8'), len(replace)),
+ replace_utf8 = runicode.unicode_encode_utf_8(replace, len(replace),
+ 'strict', allow_surrogates=True)
+ return space.newtuple([space.newtext(replace_utf8, len(replace)),
space.newint(start + consumed)])
else:
raise oefmt(space.w_TypeError,
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -42,7 +42,8 @@
return space.newbytes(ctx._string[start:end])
elif isinstance(ctx, rsre_core.UnicodeMatchContext):
uni = ctx._unicodestr[start:end]
- return space.newtext(uni.encode('utf8'), len(uni))
+ uni_utf8 = runicode.unicode_encode_utf_8(uni, len(uni), 'strict')
+ return space.newtext(uni_utf8, len(uni))
else:
# unreachable
raise SystemError
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -84,7 +84,8 @@
s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj))
w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
- w_obj.__init__(s.encode('utf8'), len(s))
+ s_utf8 = runicode.unicode_encode_utf_8(s, len(s), 'strict')
+ w_obj.__init__(s_utf8, len(s))
track_reference(space, py_obj, w_obj)
return w_obj
More information about the pypy-commit
mailing list