[pypy-commit] pypy unicode-utf8-py3: decode needs an argument

Mon Aug 6 03:19:17 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94957:a7da1446b505
Date: 2018-08-05 15:30 -0700
http://bitbucket.org/pypy/pypy/changeset/a7da1446b505/

Log:	decode needs an argument

diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -222,7 +222,7 @@
     if typ != rwinreg.REG_SZ:
         raise oefmt(space.w_ValueError, "Type must be winreg.REG_SZ")
     hkey = hkey_w(w_hkey, space)
-    with rffi.scoped_unicode2wcharp(space.utf8_w(w_subkey).decode()) as subkey:
+    with rffi.scoped_unicode2wcharp(space.utf8_w(w_subkey).decode('utf8')) as subkey:
         c_subkey = rffi.cast(rffi.CCHARP, subkey)
         with rffi.scoped_unicode2wcharp(value) as dataptr:
             c_dataptr = rffi.cast(rffi.CCHARP, dataptr)
@@ -246,7 +246,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.utf8_w(w_subkey).decode()
+        subkey = space.utf8_w(w_subkey).decode('utf8')
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
         with lltype.scoped_alloc(rwin32.PLONG.TO, 1) as bufsize_p:
@@ -296,7 +296,7 @@
             buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
             buf[0] = '\0'
         else:
-            buf = rffi.unicode2wcharp(space.utf8_w(w_value).decode())
+            buf = rffi.unicode2wcharp(space.utf8_w(w_value).decode('utf8'))
             buf = rffi.cast(rffi.CCHARP, buf)
             buflen = (space.len_w(w_value) * 2) + 1
 
@@ -314,7 +314,7 @@
             while True:
                 try:
                     w_item = space.next(w_iter)
-                    item = space.utf8_w(w_item).decode()
+                    item = space.utf8_w(w_item).decode('utf8')
                     strings.append(item)
                     buflen += 2 * (len(item) + 1)
                 except OperationError as e:
@@ -455,7 +455,7 @@
     if space.is_w(w_subkey, space.w_None):
         subkey = None
     else:
-        subkey = space.utf8_w(w_subkey).decode()
+        subkey = space.utf8_w(w_subkey).decode('utf8')
     null_dword = lltype.nullptr(rwin32.LPDWORD.TO)
     with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
         c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1716,7 +1716,7 @@
     raise_import_error(space, space.newtext(msg), w_name, w_path)
 
 def get_init_name(space, w_name):
-    name_u = space.utf8_w(w_name).decode()
+    name_u = space.utf8_w(w_name).decode('utf8')
     basename_u = name_u.split(u'.')[-1]
     try:
         basename = basename_u.encode('ascii')
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -254,8 +254,8 @@
 
     def descr_method_repr(self):
         return self.getrepr(
-            self.space, u"built-in method '%s' of '%s' object" %
-            (self.name.decode('utf-8'), self.w_objclass.getname(self.space)))
+            self.space, "built-in method '%s' of '%s' object" %
+            (self.name, self.w_objclass.getname(self.space)))
 
 
 class W_PyCWrapperObject(W_Root):
diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py
--- a/pypy/module/cpyext/state.py
+++ b/pypy/module/cpyext/state.py
@@ -141,7 +141,7 @@
             argv = space.sys.get('argv')
             if space.len_w(argv):
                 argv0 = space.getitem(argv, space.newint(0))
-                progname = space.utf8_w(argv0).decode()
+                progname = space.utf8_w(argv0).decode('utf8')
             else:
                 progname = u"pypy3"
             self.programname = rffi.unicode2wcharp(progname)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -383,9 +383,12 @@
                     # will never be called.  This causes RPython
                     # problems.  Avoid it with the nonconst hack.
                     if not allow_surrogates or nonconst.NonConstant(False):
+                        utf8 = s
+                        if isinstance(s, unicode):
+                            utf8 = s.encode('utf8')
                         ru, rs, pos = errorhandler(errors, 'utf8',
                                                    'surrogates not allowed',
-                                                   s, pos-1, pos)
+                                                   utf8, pos-1, pos)
                         if rs is not None:
                             # py3k only
                             result.append(rs)
@@ -396,7 +399,7 @@
                             else:
                                 errorhandler('strict', 'utf8',
                                              'surrogates not allowed',
-                                             s, pos-1, pos)
+                                             utf8, pos-1, pos)
                         continue
                     # else: Fall through and handles isolated high surrogates
                 result.append((chr((0xe0 | (ch >> 12)))))