[pypy-commit] pypy unicode-utf8-py3: uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni), 'strict')

Sat Sep 1 10:59:27 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95061:ef8722afb037
Date: 2018-08-31 14:29 +0200
http://bitbucket.org/pypy/pypy/changeset/ef8722afb037/

Log:	uni.encode('utf8') -> runicode.unicode_encode_utf_8(uni, len(uni),
	'strict')

diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -21,7 +21,7 @@
     """Translate an error code to a unicode message string."""
     from pypy.module._codecs.locale import str_decode_locale_surrogateescape
     uni = str_decode_locale_surrogateescape(os.strerror(errno))
-    return uni.encode('utf8'), len(uni)
+    return runicode.unicode_encode_utf_8(uni, len(uni), 'strict')
 
 class OperationError(Exception):
     """Interpreter-level exception that signals an exception that should be
@@ -647,7 +647,8 @@
             msg = u'Windows Error %d' % winerror
         w_errno = space.w_None
         w_winerror = space.newint(winerror)
-        w_msg = space.newtext(msg.encode('utf8'), len(msg))
+        msg_utf8 = runicode.unicode_encode_utf_8(msg, len(msg), 'strict')
+        w_msg = space.newtext(msg_utf8, len(msg))
     else:
         errno = e.errno
         if errno == EINTR:
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -317,7 +317,8 @@
             errorhandler = decode_error_handler(space) 
         res, size = str_decode_mbcs(s, slen, final=final, errors=errors,
                                            errorhandler=errorhandler)
-        return res.encode('utf8'), len(res)
+        res_utf8 = runicode.unicode_encode_utf_8(res, len(res), 'strict')
+        return res_utf8, len(res)
 
 def str_decode_utf8(s, errors, final, errorhandler, allow_surrogates=False):
     """ Same as checking for the valid utf8, but we know the utf8 is not
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -444,7 +444,9 @@
             ch = 0
         if ch == 0:
             raise OperationError(space.type(w_exc), w_exc)
-        return space.newtuple([space.newtext(unichr(ch).encode('utf8'), 1),
+        ch_utf8 = runicode.unicode_encode_utf_8(unichr(ch), 1, 'strict',
+                                                allow_surrogates=True)
+        return space.newtuple([space.newtext(ch_utf8, 1),
                                space.newint(start + bytelength)])
     else:
         raise oefmt(space.w_TypeError,
@@ -483,7 +485,9 @@
         if not consumed:
             # codec complained about ASCII byte.
             raise OperationError(space.type(w_exc), w_exc)
-        return space.newtuple([space.newtext(replace.encode('utf8'), len(replace)),
+        replace_utf8 = runicode.unicode_encode_utf_8(replace, len(replace),
+                                         'strict', allow_surrogates=True)
+        return space.newtuple([space.newtext(replace_utf8, len(replace)),
                                space.newint(start + consumed)])
     else:
         raise oefmt(space.w_TypeError,
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -42,7 +42,8 @@
             return space.newbytes(ctx._string[start:end])
         elif isinstance(ctx, rsre_core.UnicodeMatchContext):
             uni = ctx._unicodestr[start:end]
-            return space.newtext(uni.encode('utf8'), len(uni))
+            uni_utf8 = runicode.unicode_encode_utf_8(uni, len(uni), 'strict')
+            return space.newtext(uni_utf8, len(uni))
         else:
             # unreachable
             raise SystemError
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -84,7 +84,8 @@
     s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj))
     w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
     w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
-    w_obj.__init__(s.encode('utf8'), len(s))
+    s_utf8 = runicode.unicode_encode_utf_8(s, len(s), 'strict')
+    w_obj.__init__(s_utf8, len(s))
     track_reference(space, py_obj, w_obj)
     return w_obj