[pypy-commit] pypy unicode-utf8-py3: convert uncode_w to utf8_w, use decode when a python2 unicode object is required

Thu Jun 14 01:43:27 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94762:92280566ae0d
Date: 2018-06-13 21:49 -0700
http://bitbucket.org/pypy/pypy/changeset/92280566ae0d/

Log:	convert uncode_w to utf8_w, use decode when a python2 unicode object
	is required

diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -112,7 +112,7 @@
     # only intern identifier-like strings
     from pypy.objspace.std.unicodeobject import _isidentifier
     if (space.is_w(space.type(w_const), space.w_unicode) and
-        _isidentifier(space.unicode_w(w_const))):
+        _isidentifier(space.utf8_w(w_const))):
         return space.new_interned_w_str(w_const)
     return w_const
 
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -326,7 +326,7 @@
                     # produce compatible pycs.
                     if (self.space.isinstance_w(w_obj, self.space.w_unicode) and
                         self.space.isinstance_w(w_const, self.space.w_unicode)):
-                        #unistr = self.space.unicode_w(w_const)
+                        #unistr = self.space.utf8_w(w_const)
                         #if len(unistr) == 1:
                         #    ch = ord(unistr[0])
                         #else:
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -80,7 +80,7 @@
 
     def getname(self, space):
         try:
-            return space.unicode_w(space.getattr(self, space.newtext('__name__')))
+            return space.utf8_w(space.getattr(self, space.newtext('__name__')))
         except OperationError as e:
             if e.match(space, space.w_TypeError) or e.match(space, space.w_AttributeError):
                 return u'?'
@@ -245,10 +245,6 @@
     def bytes_w(self, space):
         self._typed_unwrap_error(space, "bytes")
 
-    def unicode_w(self, space):
-        self._typed_unwrap_error(space, "string")
-    realunicode_w = unicode_w
-
     def utf8_w(self, space):
         self._typed_unwrap_error(space, "unicode")
 
@@ -824,7 +820,7 @@
 
     def new_interned_w_str(self, w_u):
         assert isinstance(w_u, W_Root)   # and is not None
-        u = self.unicode_w(w_u)
+        u = self.utf8_w(w_u)
         if not we_are_translated():
             assert type(u) is str
         w_u1 = self.interned_strings.get(u)
@@ -1719,8 +1715,8 @@
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
-    def unicode0_w(self, w_obj):
-        "Like unicode_w, but rejects strings with NUL bytes."
+    def utf8_0_w(self, w_obj):
+        "Like utf8_w, but rejects strings with NUL bytes."
         from rpython.rlib import rstring
         result = w_obj.utf8_w(self).decode('utf8')
         if u'\x00' in result:
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -507,9 +507,9 @@
                     if fmt == 'd':
                         result = str(value).decode('ascii')
                     elif fmt == 'R':
-                        result = space.unicode_w(space.repr(value))
+                        result = space.utf8_w(space.repr(value))
                     elif fmt == 'S':
-                        result = space.unicode_w(space.str(value))
+                        result = space.utf8_w(space.str(value))
                     elif fmt == 'T':
                         result = _decode_utf8(space.type(value).name)
                     elif fmt == 'N':
@@ -565,8 +565,8 @@
 
     %8 - The result of arg.decode('utf-8')
     %N - The result of w_arg.getname(space)
-    %R - The result of space.unicode_w(space.repr(w_arg))
-    %S - The result of space.unicode_w(space.str(w_arg))
+    %R - The result of space.utf8_w(space.repr(w_arg))
+    %S - The result of space.utf8_w(space.str(w_arg))
     %T - The result of space.type(w_arg).name
 
     """
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -337,7 +337,7 @@
 
         self.space = space
         self.name = space.text_w(w_name)
-        self.qualname = space.unicode_w(w_qualname)
+        self.qualname = space.utf8_w(w_qualname)
         self.code = space.interp_w(Code, w_code)
         if not space.is_w(w_closure, space.w_None):
             from pypy.interpreter.nestedscope import Cell
@@ -434,7 +434,7 @@
 
     def fset_func_qualname(self, space, w_name):
         try:
-            self.qualname = space.unicode_w(w_name)
+            self.qualname = space.utf8_w(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
@@ -549,13 +549,13 @@
             name = self.w_function.getname(self.space)
         else:
             try:
-                name = space.unicode_w(w_name)
+                name = space.utf8_w(w_name)
             except OperationError as e:
                 if not e.match(space, space.w_TypeError):
                     raise
                 name = u'?'
-        objrepr = space.unicode_w(space.repr(self.w_instance))
-        s = u'<bound method %s of %s>' % (name, objrepr)
+        objrepr = space.utf8_w(space.repr(self.w_instance))
+        s = b'<bound method %s of %s>' % (name, objrepr)
         return space.newtext(s)
 
     def descr_method_getattribute(self, w_attr):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -327,7 +327,7 @@
         self.run_args.append("space.text0_w(%s)" % (self.scopenext(),))
 
     def visit_unicode(self, typ):
-        self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),))
+        self.run_args.append("space.utf_8(%s)" % (self.scopenext(),))
 
     def visit_utf8(self, typ):
         self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),))
@@ -498,7 +498,7 @@
         self.unwrap.append("space.text_w(%s)" % (self.nextarg(),))
 
     def visit_unicode(self, typ):
-        self.unwrap.append("space.unicode_w(%s)" % (self.nextarg(),))
+        self.unwrap.append("space.utf_8(%s)" % (self.nextarg(),))
 
     def visit_text0(self, typ):
         self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),))
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -310,7 +310,7 @@
 
     def descr_set__qualname__(self, space, w_name):
         try:
-            self._qualname = space.unicode_w(w_name)
+            self._qualname = space.utf_8(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -453,7 +453,7 @@
         space = self.space
         # co_name should be an identifier
         name = self.co_name.decode('utf-8')
-        fn = space.unicode_w(self.w_filename)
-        return space.newtext(u'<code object %s at 0x%s, file "%s", line %d>' % (
+        fn = space.utf_8(self.w_filename)
+        return space.newtext(b'<code object %s at 0x%s, file "%s", line %d>' % (
             name, unicode(self.getaddrstring(space)), fn,
             -1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,8 +1081,8 @@
             try:
                 w_pkgname = space.getattr(
                     w_module, space.newtext('__name__'))
-                w_fullname = space.newtext(u'%s.%s' %
-                    (space.unicode_w(w_pkgname), space.unicode_w(w_name)))
+                w_fullname = space.newtext(b'%s.%s' %
+                    (space.utf8_w(w_pkgname), space.utf8_w(w_name)))
                 return space.getitem(space.sys.get('modules'), w_fullname)
             except OperationError:
                 raise oefmt(
@@ -1331,7 +1331,7 @@
     def _make_function(self, oparg, freevars=None):
         space = self.space
         w_qualname = self.popvalue()
-        qualname = self.space.unicode_w(w_qualname)
+        qualname = self.space.utf8_w(w_qualname)
         w_codeobj = self.popvalue()
         codeobj = self.space.interp_w(PyCode, w_codeobj)
         if freevars is not None:
@@ -1647,7 +1647,7 @@
         lst = []
         for i in range(itemcount-1, -1, -1):
             w_item = self.peekvalue(i)
-            lst.append(space.unicode_w(w_item))
+            lst.append(space.utf8_w(w_item))
         self.dropvalues(itemcount)
         w_res = space.newtext(u''.join(lst))
         self.pushvalue(w_res)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -97,10 +97,7 @@
     newtext = wrap
 
     def text_w(self, s):
-        return self.unicode_w(s).encode('utf-8')
-
-    def unicode_w(self, s):
-        return unicode(s)
+        return self.utf8_w(s)
 
     def len(self, x):
         return len(x)
@@ -342,14 +339,14 @@
     def test_unwrap_error(self):
         space = DummySpace()
         valuedummy = object()
-        def unicode_w(w):
+        def utf8_w(w):
             if w is None:
                 raise OperationError(TypeError, None)
             if w is valuedummy:
                 raise OperationError(ValueError, None)
-            return str(w)
-        space.unicode_w = unicode_w
-        space.text_w = unicode_w
+            return bytes(w, 'utf-8')
+        space.utf8_w = utf8_w
+        space.text_w = utf8_w
         excinfo = py.test.raises(OperationError, Arguments, space, [],
                                  ["a"], [1], w_starstararg={None: 1})
         assert excinfo.value.w_type is TypeError
diff --git a/pypy/module/_cffi_backend/errorbox.py b/pypy/module/_cffi_backend/errorbox.py
--- a/pypy/module/_cffi_backend/errorbox.py
+++ b/pypy/module/_cffi_backend/errorbox.py
@@ -86,7 +86,7 @@
                 return
 
             w_text = self.space.call_function(w_done)
-            p = rffi.unicode2wcharp(self.space.unicode_w(w_text),
+            p = rffi.unicode2wcharp(self.space.utf8_w(w_text),
                                     track_allocation=False)
             if self.text_p:
                 rffi.free_wcharp(self.text_p, track_allocation=False)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -101,7 +101,7 @@
                                      startpos, endpos):
             w_replace, newpos = errorhandler(errors, encoding, reason, input,
                                              startpos, endpos)
-            return space.unicode_w(w_replace), newpos
+            return space.utf8_w(w_replace), newpos
         return decode_call_errorhandler
 
     def make_encode_errorhandler(self, space):
@@ -111,7 +111,7 @@
             w_replace, newpos = errorhandler(errors, encoding, reason, input,
                                              startpos, endpos)
             if space.isinstance_w(w_replace, space.w_unicode):
-                return space.unicode_w(w_replace), None, newpos
+                return space.utf8_w(w_replace), None, newpos
             return None, space.bytes_w(w_replace), newpos
         return encode_call_errorhandler
 
@@ -1052,7 +1052,7 @@
     if errors is None:
         errors = 'strict'
     if space.isinstance_w(w_uni, space.w_unicode):
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         state = space.fromcache(CodecState)
         result = runicode.unicode_encode_unicode_internal(
             uni, len(uni), errors, state.encode_error_handler)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -43,7 +43,7 @@
     if w_src is None:
         return default
     try:
-        return space.unicode_w(w_src)
+        return space.utf8_w(w_src)
     except OperationError as e:
         if e.match(space, space.w_TypeError):
             raise oefmt(space.w_TypeError, '"%s" must be a string', attrname)
@@ -56,7 +56,7 @@
         return u'\0'
     if not space.isinstance_w(w_src, space.w_unicode):
         raise oefmt(space.w_TypeError, '"%s" must be string, not %T', name, w_src)
-    src = space.unicode_w(w_src)
+    src = space.utf8_w(w_src)
     if len(src) == 1:
         return src[0]
     if len(src) == 0:
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -73,13 +73,13 @@
                             break
                 raise
             self.line_num += 1
-            line = space.unicode_w(w_line)
+            line = space.utf8_w(w_line)
             for c in line:
-                if c == u'\0':
+                if c == b'\0':
                     raise self.error(u"line contains NULL byte")
 
                 if state == START_RECORD:
-                    if c == u'\n' or c == u'\r':
+                    if c == b'\n' or c == b'\r':
                         state = EAT_CRNL
                         continue
                     # normal character - handle as START_FIELD
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -42,9 +42,9 @@
             if space.is_w(w_field, space.w_None):
                 field = u""
             elif space.isinstance_w(w_field, space.w_float):
-                field = space.unicode_w(space.repr(w_field))
+                field = space.utf8_w(space.repr(w_field))
             else:
-                field = space.unicode_w(space.str(w_field))
+                field = space.utf8_w(space.str(w_field))
             #
             if dialect.quoting == QUOTE_NONNUMERIC:
                 try:
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -218,17 +218,17 @@
 
 def create_spec_for_function(space, w_func):
     assert isinstance(w_func, Function)
-    pre = u'built-in function ' if isinstance(w_func, BuiltinFunction) else u''
+    pre = b'built-in function ' if isinstance(w_func, BuiltinFunction) else b''
     if w_func.w_module is not None:
-        module = space.unicode_w(w_func.w_module)
-        if module != u'builtins':
-            return u'<%s%s.%s>' % (pre, module, w_func.getname(space))
-    return u'<%s%s>' % (pre, w_func.getname(space))
+        module = space.utf8_w(w_func.w_module)
+        if module != b'builtins':
+            return b'<%s%s.%s>' % (pre, module, w_func.getname(space))
+    return b'<%s%s>' % (pre, w_func.getname(space))
 
 
 def create_spec_for_object(space, w_type):
     class_name = w_type.getname(space)
-    return u"<'%s' object>" % (class_name,)
+    return b"<'%s' object>" % (class_name,)
 
 
 class W_DelayedBuiltinStr(W_Root):
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -17,7 +17,7 @@
 
 
 def raw_encode_basestring_ascii(space, w_unicode):
-    u = space.unicode_w(w_unicode)
+    u = space.utf8_w(w_unicode).encode()
     for i in range(len(u)):
         c = ord(u[i])
         if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py
--- a/pypy/module/_pypyjson/targetjson.py
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -75,10 +75,6 @@
         assert isinstance(w_x, W_String)
         return w_x.strval
 
-    def unicode_w(self, w_x):
-        assert isinstance(w_x, W_Unicode)
-        return w_x.unival
-
     @dont_inline
     def call_method(self, obj, name, arg):
         assert name == 'append'
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -233,8 +233,8 @@
     def _dealloc_warn(self):
         space = self.space
         try:
-            msg = (u"unclosed %s" %
-                   space.unicode_w(space.repr(self)))
+            msg = (b"unclosed %s" %
+                   space.utf8_w(space.repr(self)))
             space.warn(space.newtext(msg), space.w_ResourceWarning)
         except OperationError as e:
             # Spurious errors can appear at shutdown
diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -130,7 +130,7 @@
     assert space.bytes_w(w_p) == packed
     w_ip = space.appexec([w_socket, w_p],
                          "(_socket, p): return _socket.inet_ntoa(p)")
-    assert space.unicode_w(w_ip) == ip
+    assert space.utf8_w(w_ip) == ip
 
 def test_pton_ntop_ipv4():
     if not hasattr(socket, 'inet_pton'):
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -111,7 +111,7 @@
 
     def repr_w(self):
         space = self.space
-        u = space.unicode_w(space.repr(self.w_pattern))
+        u = space.utf8_w(space.repr(self.w_pattern)).decode()
         if len(u) > 200:
             u = u[:200]
         flag_items = []
@@ -163,8 +163,8 @@
         string = None
         buf = None
         space = self.space
-        if space.isinstance_w(w_string, space.w_unicode):
-            unicodestr = space.unicode_w(w_string)
+        if space.isinstance_w(w_string, space.w_utf8):
+            unicodestr = space.utf8_w(w_string).decode()
             length = len(unicodestr)
         elif space.isinstance_w(w_string, space.w_bytes):
             string = space.bytes_w(w_string)
@@ -565,7 +565,7 @@
         ctx = self.ctx
         start, end = ctx.match_start, ctx.match_end
         w_s = slice_w(space, ctx, start, end, space.w_None)
-        u = space.unicode_w(space.repr(w_s))
+        u = space.utf8_w(space.repr(w_s)).decode()
         if len(u) > 50:
             u = u[:50]
         return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
diff --git a/pypy/module/_string/formatter.py b/pypy/module/_string/formatter.py
--- a/pypy/module/_string/formatter.py
+++ b/pypy/module/_string/formatter.py
@@ -1,10 +1,10 @@
 def formatter_parser(space, w_unicode):
     from pypy.objspace.std.newformat import unicode_template_formatter
-    tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+    tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
     return tformat.formatter_parser()
 
 def formatter_field_name_split(space, w_unicode):
     from pypy.objspace.std.newformat import unicode_template_formatter
-    tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+    tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
     return tformat.formatter_field_name_split()
 
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -247,9 +247,9 @@
     w_stderr = space.sys.get("stderr")
 
     # Print "filename:lineno: category: text\n"
-    message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
-                                    space.unicode_w(w_name),
-                                    space.unicode_w(w_text))
+    message = b"%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno,
+                                    space.utf8_w(w_name),
+                                    space.utf8_w(w_text))
     space.call_method(w_stderr, "write", space.newtext(message))
 
     # Print "  source_line\n"
@@ -267,7 +267,7 @@
 
     if not w_sourceline:
         return
-    line = space.unicode_w(w_sourceline)
+    line = space.utf8_w(w_sourceline)
     if not line:
         return
 
diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -222,7 +222,7 @@
     if typ != rwinreg.REG_SZ:
         raise oefmt(space.w_ValueError, "Type must be winreg.REG_SZ")
     hkey = hkey_w(w_hkey, space)
-    with rffi.scoped_unicode2wcharp(space.unicode_w(w_subkey)) as subkey:
+    with rffi.scoped_unicode2wcharp(space.utf8_w(w_subkey).decode()) as subkey:
         c_subkey = rffi.cast(rffi.CCHARP, subkey)
         with rffi.scoped_unicode2wcharp(value) as dataptr:
             c_dataptr = rffi.cast(rffi.CCHARP, dataptr)
@@ -246,7 +246,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.unicode_w(w_subkey)
+        subkey = space.utf8_w(w_subkey).decode()
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
         with lltype.scoped_alloc(rwin32.PLONG.TO, 1) as bufsize_p:
@@ -296,7 +296,7 @@
             buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
             buf[0] = '\0'
         else:
-            buf = rffi.unicode2wcharp(space.unicode_w(w_value))
+            buf = rffi.unicode2wcharp(space.utf8_w(w_value).decode())
             buf = rffi.cast(rffi.CCHARP, buf)
             buflen = (space.len_w(w_value) * 2) + 1
 
@@ -314,7 +314,7 @@
             while True:
                 try:
                     w_item = space.next(w_iter)
-                    item = space.unicode_w(w_item)
+                    item = space.utf8_w(w_item).decode()
                     strings.append(item)
                     buflen += 2 * (len(item) + 1)
                 except OperationError as e:
@@ -455,7 +455,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.unicode_w(w_subkey)
+        subkey = space.utf8_w(w_subkey).decode()
     null_dword = lltype.nullptr(rwin32.LPDWORD.TO)
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -754,7 +754,7 @@
             return space.newtext("array('%s')" % self.typecode)
         elif self.typecode == "u":
             r = space.repr(self.descr_tounicode(space))
-            s = u"array('u', %s)" % space.unicode_w(r)
+            s = b"array('b', %s)" % space.utf8_w(r)
             return space.newtext(s)
         else:
             r = space.repr(self.descr_tolist(space))
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1707,13 +1707,13 @@
         else:
             look_for = also_look_for
     assert look_for is not None
-    msg = u"function %s not found in library %s" % (
-        look_for.decode('utf-8'), space.unicode_w(space.newfilename(path)))
+    msg = b"function %s not found in library %s" % (
+        look_for.decode('utf-8'), space.utf8_w(space.newfilename(path)))
     w_path = space.newfilename(path)
     raise_import_error(space, space.newtext(msg), w_name, w_path)
 
 def get_init_name(space, w_name):
-    name_u = space.unicode_w(w_name)
+    name_u = space.utf8_w(w_name).decode()
     basename_u = name_u.split(u'.')[-1]
     try:
         basename = basename_u.encode('ascii')
diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py
--- a/pypy/module/cpyext/state.py
+++ b/pypy/module/cpyext/state.py
@@ -141,7 +141,7 @@
             argv = space.sys.get('argv')
             if space.len_w(argv):
                 argv0 = space.getitem(argv, space.newint(0))
-                progname = space.unicode_w(argv0)
+                progname = space.utf8_w(argv0).decode()
             else:
                 progname = u"pypy3"
             self.programname = rffi.unicode2wcharp(progname)
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -101,7 +101,7 @@
 
 def is_interned_string(space, w_obj):
     try:
-        u = space.unicode_w(w_obj)
+        u = space.utf8_w(w_obj)
     except OperationError:
         return False
     return space.interned_strings.get(u) is not None
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -658,8 +658,8 @@
         b_text = rffi.str2charp('caf\x82xx')
         b_encoding = rffi.str2charp('cp437')
         b_errors = rffi.str2charp('strict')
-        assert space.unicode_w(PyUnicode_Decode(
-            space, b_text, 4, b_encoding, b_errors)) == u'caf\xe9'
+        assert space.utf8_w(PyUnicode_Decode(
+            space, b_text, 4, b_encoding, b_errors)).decode() == u'caf\xe9'
         assert (space.utf8_w(
             PyUnicode_Decode(space, b_text, 4, b_encoding, None)) ==
             u'caf\xe9'.encode("utf-8"))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -72,7 +72,7 @@
 
 def unicode_attach(space, py_obj, w_obj, w_userdata=None):
     "Fills a newly allocated PyUnicodeObject with a unicode string"
-    value = space.unicode_w(w_obj)
+    value = space.utf8_w(w_obj).decode()
     set_wsize(py_obj, len(value))
     set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
     _readify(space, py_obj, value)
@@ -353,7 +353,7 @@
     if not get_wbuffer(ref):
         # Copy unicode buffer
         w_unicode = from_ref(space, rffi.cast(PyObject, ref))
-        u = space.unicode_w(w_unicode)
+        u = space.utf8_w(w_unicode).decode()
         set_wbuffer(ref, rffi.unicode2wcharp(u))
         set_wsize(ref, len(u))
     if psize:
@@ -943,7 +943,7 @@
     than, equal, and greater than, respectively. It is best to pass only
     ASCII-encoded strings, but the function interprets the input string as
     ISO-8859-1 if it contains non-ASCII characters."""
-    uni = space.unicode_w(w_uni)
+    uni = space.utf8_w(w_uni).decode()
     i = 0
     # Compare Unicode string and source character set string
     while i < len(uni) and string[i] != '\0':
@@ -1054,7 +1054,7 @@
 
 @cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
 def PyUnicode_Substring(space, w_str, start, end):
-    usrc = space.unicode_w(w_str)
+    usrc = space.utf8_w(w_str).decode()
     length = len(usrc)
     if start < 0 or end < 0:
         raise oefmt(space.w_IndexError, "string index out of range")
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -150,10 +150,10 @@
 
     def descr_repr(self, space):
         if self.args_w:
-            args_repr = space.unicode_w(
+            args_repr = space.utf8_w(
                 space.repr(space.newtuple(self.args_w)))
         else:
-            args_repr = u"()"
+            args_repr = b"()"
         clsname = self.getclass(space).getname(space)
         return space.newtext(clsname + args_repr)
 
@@ -587,38 +587,38 @@
 
     def descr_str(self, space):
         if self.w_errno:
-            errno = space.unicode_w(space.str(self.w_errno))
+            errno = space.utf8_w(space.str(self.w_errno))
         else:
-            errno = u""
+            errno = b""
         if self.w_strerror:
-            strerror = space.unicode_w(space.str(self.w_strerror))
+            strerror = space.utf8_w(space.str(self.w_strerror))
         else:
-            strerror = u""
+            strerror = b""
         if rwin32.WIN32 and self.w_winerror:
-            winerror = space.unicode_w(space.str(self.w_winerror))
+            winerror = space.utf8_w(space.str(self.w_winerror))
             # If available, winerror has the priority over errno
             if self.w_filename:
                 if self.w_filename2:
-                    return space.newtext(u"[WinError %s] %s: %s -> %s" % (
+                    return space.newtext(b"[WinError %s] %s: %s -> %s" % (
                         winerror, strerror,
-                        space.unicode_w(space.repr(self.w_filename)),
-                        space.unicode_w(space.repr(self.w_filename2))))
-                return space.newtext(u"[WinError %s] %s: %s" % (
+                        space.utf8_w(space.repr(self.w_filename)),
+                        space.utf8_w(space.repr(self.w_filename2))))
+                return space.newtext(b"[WinError %s] %s: %s" % (
                     winerror, strerror,
-                    space.unicode_w(space.repr(self.w_filename))))
-            return space.newtext(u"[WinError %s] %s" % (
+                    space.utf8_w(space.repr(self.w_filename))))
+            return space.newtext(b"[WinError %s] %s" % (
                 winerror, strerror))
         if self.w_filename:
             if self.w_filename2:
-                return space.newtext(u"[Errno %s] %s: %s -> %s" % (
+                return space.newtext(b"[Errno %s] %s: %s -> %s" % (
                     errno, strerror,
-                    space.unicode_w(space.repr(self.w_filename)),
-                    space.unicode_w(space.repr(self.w_filename2))))
-            return space.newtext(u"[Errno %s] %s: %s" % (
+                    space.utf8_w(space.repr(self.w_filename)),
+                    space.utf8_w(space.repr(self.w_filename2))))
+            return space.newtext(b"[Errno %s] %s: %s" % (
                 errno, strerror,
-                space.unicode_w(space.repr(self.w_filename))))
+                space.utf8_w(space.repr(self.w_filename))))
         if self.w_errno and self.w_strerror:
-            return space.newtext(u"[Errno %s] %s" % (
+            return space.newtext(b"[Errno %s] %s" % (
                 errno, strerror))
         return W_BaseException.descr_str(self, space)
 
@@ -785,7 +785,7 @@
             values_w = space.fixedview(self.args_w[1])
             w_tuple = space.newtuple(values_w + [self.w_lastlineno])
             args_w = [self.args_w[0], w_tuple]
-            args_repr = space.unicode_w(space.repr(space.newtuple(args_w)))
+            args_repr = space.utf8_w(space.repr(space.newtuple(args_w)))
             clsname = self.getclass(space).getname(space)
             return space.newtext(clsname + args_repr)
         else:
@@ -793,15 +793,15 @@
 
     # CPython Issue #21669: Custom error for 'print' & 'exec' as statements
     def _report_missing_parentheses(self, space):
-        text = space.unicode_w(self.w_text)
-        if u'(' in text:
+        text = space.utf8_w(self.w_text)
+        if b'(' in text:
             # Use default error message for any line with an opening paren
             return
         # handle the simple statement case
         if self._check_for_legacy_statements(space, text, 0):
             return
         # Handle the one-line complex statement case
-        pos = text.find(u':')
+        pos = text.find(b':')
         if pos < 0:
             return
         # Check again, starting from just after the colon
@@ -817,11 +817,11 @@
         if start > 0:
             text = text[start:]
         # Check for legacy print statements
-        if text.startswith(u"print "):
+        if text.startswith(b"print "):
             self.w_msg = space.newtext("Missing parentheses in call to 'print'")
             return True
         # Check for legacy exec statements
-        if text.startswith(u"exec "):
+        if text.startswith(b"exec "):
             self.w_msg = space.newtext("Missing parentheses in call to 'exec'")
             return True
         return False
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2258,7 +2258,7 @@
                                space.newint(info[2])])
 
     def _getfinalpathname(space, w_path):
-        path = space.unicode_w(w_path)
+        path = space.utf8_w(w_path)
         try:
             result = nt._getfinalpathname(path)
         except nt.LLNotImplemented as e:
diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -27,7 +27,7 @@
         if space.isinstance_w(w_path, space.w_bytes):
             raise oefmt(space.w_TypeError, "os.scandir() doesn't support bytes path"
                                            " on Windows, use Unicode instead")
-        path = space.unicode_w(w_path)
+        path = space.utf8_w(w_path)
         result_is_bytes = False
 
     # 'path' is always bytes on posix and always unicode on windows
@@ -157,8 +157,8 @@
         self.w_name = w_name
 
     def descr_repr(self, space):
-        u = space.unicode_w(space.repr(self.w_name))
-        return space.newtext(u"<DirEntry %s>" % u)
+        u = space.utf8_w(space.repr(self.w_name))
+        return space.newtext(b"<DirEntry %s>" % u)
 
     def fget_name(self, space):
         return self.w_name
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -639,7 +639,7 @@
         """Parse(data[, isfinal])
 Parse XML data.  `isfinal' should be true at end of input."""
         if space.isinstance_w(w_data, space.w_unicode):
-            data = encode_utf8(space, w_data.unicode_w(space))
+            data = encode_utf8(space, w_data.utf8_w(space))
             # Explicitly set UTF-8 encoding. Return code ignored.
             XML_SetEncoding(self.itself, "utf-8")
         else:
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -616,7 +616,7 @@
             # it saves the string that is later deleted when this
             # function is called again. A refactoring of this module
             # could remove this
-            tm_zone = encode_utf8(space, space.unicode_w(tup_w[9]), allow_surrogates=True)
+            tm_zone = space.utf8_w(tup_w[9])
             malloced_str = rffi.str2charp(tm_zone, track_allocation=False)
             if old_tm_zone != lltype.nullptr(rffi.CCHARP.TO):
                 rffi.free_charp(old_tm_zone, track_allocation=False)
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -203,7 +203,7 @@
     def descr_fromhex(space, w_bytearraytype, w_hexstring):
         if not space.is_w(space.type(w_hexstring), space.w_unicode):
             raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
-        hexstring = space.unicode_w(w_hexstring)
+        hexstring = space.utf8_w(w_hexstring)
         data = _hexstring_to_array(space, hexstring)
         # in CPython bytearray.fromhex is a staticmethod, so
         # we ignore w_type and always return a bytearray
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -566,7 +566,7 @@
         if not space.is_w(space.type(w_hexstring), space.w_unicode):
             raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
         from pypy.objspace.std.bytearrayobject import _hexstring_to_array
-        hexstring = space.unicode_w(w_hexstring)
+        hexstring = space.utf8_w(w_hexstring)
         bytes = ''.join(_hexstring_to_array(space, hexstring))
         return W_BytesObject(bytes)
 
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1436,8 +1436,8 @@
     def descr_repr(self, space):
         typename = space.type(self).getname(space)
         w_seq = space.call_function(space.w_list, self)
-        seq_repr = space.unicode_w(space.repr(w_seq))
-        return space.newtext(u"%s(%s)" % (typename, seq_repr))
+        seq_repr = space.utf8_w(space.repr(w_seq))
+        return space.newtext(b"%s(%s)" % (typename, seq_repr))
 
     def descr_len(self, space):
         return space.len(self.w_dict)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -44,8 +44,8 @@
         return space.str(self.w_mapping)
 
     def descr_repr(self, space):
-        return space.newtext(u"mappingproxy(%s)" %
-                                (space.unicode_w(space.repr(self.w_mapping)),))
+        return space.newtext(b"mappingproxy(%s)" %
+                                (space.utf8_w(space.repr(self.w_mapping)),))
 
     @unwrap_spec(w_default=WrappedDefault(None))
     def get_w(self, space, w_key, w_default):
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -447,14 +447,14 @@
                 # arbitrary unicode chars if w_value is an arbitrary unicode
                 # string
                 w_value = self.space.repr(w_value)
-                self.std_wp(self.space.unicode_w(w_value))
+                self.std_wp(self.space.utf8_w(w_value))
 
         def fmt_a(self, w_value):
             from pypy.objspace.std.unicodeobject import ascii_from_object
             w_value = ascii_from_object(self.space, w_value)
             # %a calls ascii(), which should return an ascii unicode string
             if do_unicode:
-                value = self.space.unicode_w(w_value)
+                value = self.space.utf8_w(w_value)
             else:
                 value = self.space.text_w(w_value)
             self.std_wp(value)
@@ -498,7 +498,7 @@
                 raise oefmt(space.w_TypeError, "%c requires int or single byte")
             else:
                 if space.isinstance_w(w_value, space.w_unicode):
-                    ustr = space.unicode_w(w_value)
+                    ustr = space.utf8_w(w_value)
                     if len(ustr) == 1:
                         self.std_wp(ustr)
                         return
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -387,8 +387,7 @@
 
 def _unmarshal_strlist(u):
     items_w = _unmarshal_tuple_w(u)
-    return [_encode_utf8(u.space, u.space.unicode_w(w_item))
-            for w_item in items_w]
+    return [u.space.utf8_w(w_item) for w_item in items_w]
 
 def _unmarshal_tuple_w(u):
     w_obj = u.get_w_obj()
@@ -414,8 +413,8 @@
     varnames    = _unmarshal_strlist(u)
     freevars    = _unmarshal_strlist(u)
     cellvars    = _unmarshal_strlist(u)
-    filename    = _encode_utf8(space, space.unicode0_w(u.get_w_obj()))
-    name        = _encode_utf8(space, space.unicode_w(u.get_w_obj()))
+    filename    = space.utf8_0_w(u.get_w_obj())
+    name        = space.utf8_w(u.get_w_obj())
     firstlineno = u.get_int()
     lnotab      = space.bytes_w(u.get_w_obj())
     filename = assert_str0(filename)
@@ -442,12 +441,11 @@
         m.atom_str(typecode, s)
 
 # surrogate-preserving variants
-_encode_utf8 = unicodehelper.encode_utf8sp
 _decode_utf8 = unicodehelper.decode_utf8sp
 
 @marshaller(W_UnicodeObject)
 def marshal_unicode(space, w_unicode, m):
-    s = _encode_utf8(space, space.unicode_w(w_unicode))
+    s = space.utf8_w(w_unicode)
     _marshal_unicode(space, s, m, w_unicode=w_unicode)
 
 @unmarshaller(TYPE_UNICODE)
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -572,7 +572,7 @@
             space = self.space
             if not space.is_w(space.type(w_string), space.w_unicode):
                 w_string = space.str(w_string)
-            string = space.unicode_w(w_string)
+            string = space.utf8_w(w_string)
             if self._parse_spec("s", "<"):
                 return self.wrap(string)
             if self._type != "s":
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -762,12 +762,12 @@
             w_module = w_type.lookup("__module__")
             if w_module is not None:
                 try:
-                    modulename = self.unicode_w(w_module)
+                    modulename = self.utf8_w(w_module)
                 except OperationError as e:
                     if not e.match(self, self.w_TypeError):
                         raise
                 else:
-                    classname = u'%s.%s' % (modulename, classname)
+                    classname = b'%s.%s' % (modulename, classname)
         else:
             classname = w_type.name.decode('utf-8')
         return classname
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1291,9 +1291,9 @@
         assert isinstance(string, str)
         return string
 
-    def unicode_w(self, u):
-        assert isinstance(u, unicode)
-        return u
+    def utf8_w(self, b):
+        assert isinstance(u, str)
+        return b
 
     def int_w(self, integer, allow_conversion=True):
         assert isinstance(integer, int)
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -103,10 +103,10 @@
         items = self.tolist()
         if len(items) == 1:
             return space.newtext(
-                u"(" + space.unicode_w(space.repr(items[0])) + u",)")
-        tmp = u", ".join([space.unicode_w(space.repr(item))
+                b"(" + space.utf8_w(space.repr(items[0])) + b",)")
+        tmp = b", ".join([space.utf8_w(space.repr(item))
                           for item in items])
-        return space.newtext(u"(" + tmp + u")")
+        return space.newtext(b"(" + tmp + b")")
 
     def descr_hash(self, space):
         raise NotImplementedError
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -204,7 +204,7 @@
             w_qualname = self.dict_w.pop('__qualname__', None)
             if w_qualname is not None:
                 if space.isinstance_w(w_qualname, space.w_unicode):
-                    self.qualname = space.unicode_w(w_qualname)
+                    self.qualname = space.utf8_w(w_qualname)
                 elif not self.flag_cpytype:
                     raise oefmt(space.w_TypeError,
                                 "type __qualname__ must be a str, not %T",
@@ -723,9 +723,9 @@
         if w_mod is None or not space.isinstance_w(w_mod, space.w_text):
             mod = None
         else:
-            mod = space.unicode_w(w_mod)
-        if mod is not None and mod != u'builtins':
-            return space.newtext(u"<class '%s.%s'>" % (mod, self.getqualname(space)))
+            mod = space.utf8_w(w_mod)
+        if mod is not None and mod != b'builtins':
+            return space.newtext(b"<class '%s.%s'>" % (mod, self.getqualname(space)))
         else:
             return space.newtext("<class '%s'>" % (self.name,))
 
@@ -869,7 +869,7 @@
     w_type = _check(space, w_type)
     if not w_type.is_heaptype():
         raise oefmt(space.w_TypeError, "can't set %N.__qualname__", w_type)
-    w_type.qualname = space.unicode_w(w_value)
+    w_type.qualname = space.utf8_w(w_value)
 
 def descr_get__mro__(space, w_type):
     w_type = _check(space, w_type)
@@ -1158,7 +1158,7 @@
     if not space.isinstance_w(w_name, space.w_text):
         raise oefmt(space.w_TypeError,
             "__slots__ items must be strings, not '%T'", w_name)
-    if not _isidentifier(space.unicode_w(w_name)):
+    if not _isidentifier(space.utf8_w(w_name)):
         raise oefmt(space.w_TypeError, "__slots__ must be identifiers")
     return w_name.text_w(space)
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -210,15 +210,15 @@
 
     @staticmethod
     def descr_maketrans(space, w_type, w_x, w_y=None, w_z=None):
-        y = None if space.is_none(w_y) else space.unicode_w(w_y)
-        z = None if space.is_none(w_z) else space.unicode_w(w_z)
+        y = None if space.is_none(w_y) else space.utf8_w(w_y)
+        z = None if space.is_none(w_z) else space.utf8_w(w_z)
         w_new = space.newdict()
 
         if y is not None:
             # x must be a string too, of equal length
             ylen = len(y)
             try:
-                x = space.unicode_w(w_x)
+                x = space.utf8_w(w_x)
             except OperationError as e:
                 if not e.match(space, space.w_TypeError):
                     raise
@@ -257,7 +257,7 @@
                 w_key, w_value = space.unpackiterable(w_item, 2)
                 if space.isinstance_w(w_key, space.w_unicode):
                     # convert string keys to integer keys
-                    key = space.unicode_w(w_key)
+                    key = space.utf8_w(w_key)
                     if len(key) != 1:
                         raise oefmt(space.w_ValueError,
                                     "string keys in translate table must be "
@@ -283,7 +283,7 @@
         if space.is_w(space.type(self), space.w_unicode):
             return self
         # Subtype -- return genuine unicode string with the same value.
-        return space.newtext(space.unicode_w(self))
+        return space.newtext(space.utf8_w(self))
 
     def descr_hash(self, space):
         x = compute_hash(self._utf8)