[pypy-commit] pypy unicode-utf8-py3: fixes from testing module/_ast

Mon Jul 2 00:12:58 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94797:599273325eea
Date: 2018-07-01 16:37 -0500
http://bitbucket.org/pypy/pypy/changeset/599273325eea/

Log:	fixes from testing module/_ast

diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -14,6 +14,8 @@
 from pypy.objspace.std.stringmethods import StringMethods
 from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 from pypy.objspace.std.formatting import mod_format, FORMAT_BYTES
+from pypy.objspace.std.unicodeobject import (encode_object, getdefaultencoding,
+           decode_object)
 
 class W_AbstractBytesObject(W_Root):
     __slots__ = ()
@@ -688,7 +690,6 @@
             raise oefmt(space.w_TypeError,
                 "encoding without string argument (got '%T' instead)",
                 w_source)
-        from pypy.objspace.std.unicodeobject import encode_object
         w_source = encode_object(space, w_source, encoding, errors)
         # and continue with the encoded string
     elif errors is not None:
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1144,14 +1144,13 @@
 
 
 def encode_object(space, w_object, encoding, errors):
-    w_encoder = None
-    if encoding is None:
-        # Get the encoder functions as a wrapped object.
-        # This lookup is cached.
-        w_encoder = space.sys.get_w_default_encoder()
     if errors is None or errors == 'strict':
-        if ((encoding is None and space.sys.defaultencoding == 'ascii') or
-             encoding == 'ascii'):
+        if encoding is None or encoding == 'utf-8':
+            utf8 = space.utf8_w(w_object)
+            if rutf8.has_surrogates(utf8):
+                utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
+            return space.newbytes(utf8)
+        elif encoding == 'ascii':
             s = space.utf8_w(w_object)
             try:
                 rutf8.check_ascii(s)
@@ -1161,21 +1160,11 @@
                     a.pos, a.pos + 1)
                 assert False, "always raises"
             return space.newbytes(s)
-        if ((encoding is None and space.sys.defaultencoding == 'utf8') or
-             encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'):
-            utf8 = space.utf8_w(w_object)
-            if rutf8.has_surrogates(utf8):
-                utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
-            return space.newbytes(utf8)
-    if w_encoder is None:
-        from pypy.module._codecs.interp_codecs import lookup_codec
-        w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
-    if errors is None:
-        w_errors = space.newtext('strict')
-    else:
-        w_errors = space.newtext(errors)
-    w_restuple = space.call_function(w_encoder, w_object, w_errors)
-    w_retval = space.getitem(w_restuple, space.newint(0))
+
+    from pypy.module._codecs.interp_codecs import encode_text
+    if encoding is None:
+        encoding = space.sys.defaultencoding
+    w_retval = encode_text(space, w_object, encoding, errors)
     if not space.isinstance_w(w_retval, space.w_bytes):
         raise oefmt(space.w_TypeError,
                     "'%s' encoder returned '%T' instead of 'bytes'; "