[pypy-commit] pypy unicode-utf8-py3: convert uncode_w to utf8_w, use decode when a python2 unicode object is required
mattip
pypy.commits at gmail.com
Thu Jun 14 01:43:27 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94762:92280566ae0d
Date: 2018-06-13 21:49 -0700
http://bitbucket.org/pypy/pypy/changeset/92280566ae0d/
Log: convert uncode_w to utf8_w, use decode when a python2 unicode object
is required
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -112,7 +112,7 @@
# only intern identifier-like strings
from pypy.objspace.std.unicodeobject import _isidentifier
if (space.is_w(space.type(w_const), space.w_unicode) and
- _isidentifier(space.unicode_w(w_const))):
+ _isidentifier(space.utf8_w(w_const))):
return space.new_interned_w_str(w_const)
return w_const
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -326,7 +326,7 @@
# produce compatible pycs.
if (self.space.isinstance_w(w_obj, self.space.w_unicode) and
self.space.isinstance_w(w_const, self.space.w_unicode)):
- #unistr = self.space.unicode_w(w_const)
+ #unistr = self.space.utf8_w(w_const)
#if len(unistr) == 1:
# ch = ord(unistr[0])
#else:
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -80,7 +80,7 @@
def getname(self, space):
try:
- return space.unicode_w(space.getattr(self, space.newtext('__name__')))
+ return space.utf8_w(space.getattr(self, space.newtext('__name__')))
except OperationError as e:
if e.match(space, space.w_TypeError) or e.match(space, space.w_AttributeError):
return u'?'
@@ -245,10 +245,6 @@
def bytes_w(self, space):
self._typed_unwrap_error(space, "bytes")
- def unicode_w(self, space):
- self._typed_unwrap_error(space, "string")
- realunicode_w = unicode_w
-
def utf8_w(self, space):
self._typed_unwrap_error(space, "unicode")
@@ -824,7 +820,7 @@
def new_interned_w_str(self, w_u):
assert isinstance(w_u, W_Root) # and is not None
- u = self.unicode_w(w_u)
+ u = self.utf8_w(w_u)
if not we_are_translated():
assert type(u) is str
w_u1 = self.interned_strings.get(u)
@@ -1719,8 +1715,8 @@
def convert_to_w_unicode(self, w_obj):
return w_obj.convert_to_w_unicode(self)
- def unicode0_w(self, w_obj):
- "Like unicode_w, but rejects strings with NUL bytes."
+ def utf8_0_w(self, w_obj):
+ "Like utf8_w, but rejects strings with NUL bytes."
from rpython.rlib import rstring
result = w_obj.utf8_w(self).decode('utf8')
if u'\x00' in result:
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -507,9 +507,9 @@
if fmt == 'd':
result = str(value).decode('ascii')
elif fmt == 'R':
- result = space.unicode_w(space.repr(value))
+ result = space.utf8_w(space.repr(value))
elif fmt == 'S':
- result = space.unicode_w(space.str(value))
+ result = space.utf8_w(space.str(value))
elif fmt == 'T':
result = _decode_utf8(space.type(value).name)
elif fmt == 'N':
@@ -565,8 +565,8 @@
%8 - The result of arg.decode('utf-8')
%N - The result of w_arg.getname(space)
- %R - The result of space.unicode_w(space.repr(w_arg))
- %S - The result of space.unicode_w(space.str(w_arg))
+ %R - The result of space.utf8_w(space.repr(w_arg))
+ %S - The result of space.utf8_w(space.str(w_arg))
%T - The result of space.type(w_arg).name
"""
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -337,7 +337,7 @@
self.space = space
self.name = space.text_w(w_name)
- self.qualname = space.unicode_w(w_qualname)
+ self.qualname = space.utf8_w(w_qualname)
self.code = space.interp_w(Code, w_code)
if not space.is_w(w_closure, space.w_None):
from pypy.interpreter.nestedscope import Cell
@@ -434,7 +434,7 @@
def fset_func_qualname(self, space, w_name):
try:
- self.qualname = space.unicode_w(w_name)
+ self.qualname = space.utf8_w(w_name)
except OperationError as e:
if e.match(space, space.w_TypeError):
raise oefmt(space.w_TypeError,
@@ -549,13 +549,13 @@
name = self.w_function.getname(self.space)
else:
try:
- name = space.unicode_w(w_name)
+ name = space.utf8_w(w_name)
except OperationError as e:
if not e.match(space, space.w_TypeError):
raise
name = u'?'
- objrepr = space.unicode_w(space.repr(self.w_instance))
- s = u'<bound method %s of %s>' % (name, objrepr)
+ objrepr = space.utf8_w(space.repr(self.w_instance))
+ s = b'<bound method %s of %s>' % (name, objrepr)
return space.newtext(s)
def descr_method_getattribute(self, w_attr):
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -327,7 +327,7 @@
self.run_args.append("space.text0_w(%s)" % (self.scopenext(),))
def visit_unicode(self, typ):
- self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),))
+ self.run_args.append("space.utf_8(%s)" % (self.scopenext(),))
def visit_utf8(self, typ):
self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),))
@@ -498,7 +498,7 @@
self.unwrap.append("space.text_w(%s)" % (self.nextarg(),))
def visit_unicode(self, typ):
- self.unwrap.append("space.unicode_w(%s)" % (self.nextarg(),))
+ self.unwrap.append("space.utf_8(%s)" % (self.nextarg(),))
def visit_text0(self, typ):
self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),))
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -310,7 +310,7 @@
def descr_set__qualname__(self, space, w_name):
try:
- self._qualname = space.unicode_w(w_name)
+ self._qualname = space.utf_8(w_name)
except OperationError as e:
if e.match(space, space.w_TypeError):
raise oefmt(space.w_TypeError,
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -453,7 +453,7 @@
space = self.space
# co_name should be an identifier
name = self.co_name.decode('utf-8')
- fn = space.unicode_w(self.w_filename)
- return space.newtext(u'<code object %s at 0x%s, file "%s", line %d>' % (
+ fn = space.utf_8(self.w_filename)
+ return space.newtext(b'<code object %s at 0x%s, file "%s", line %d>' % (
name, unicode(self.getaddrstring(space)), fn,
-1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,8 +1081,8 @@
try:
w_pkgname = space.getattr(
w_module, space.newtext('__name__'))
- w_fullname = space.newtext(u'%s.%s' %
- (space.unicode_w(w_pkgname), space.unicode_w(w_name)))
+ w_fullname = space.newtext(b'%s.%s' %
+ (space.utf8_w(w_pkgname), space.utf8_w(w_name)))
return space.getitem(space.sys.get('modules'), w_fullname)
except OperationError:
raise oefmt(
@@ -1331,7 +1331,7 @@
def _make_function(self, oparg, freevars=None):
space = self.space
w_qualname = self.popvalue()
- qualname = self.space.unicode_w(w_qualname)
+ qualname = self.space.utf8_w(w_qualname)
w_codeobj = self.popvalue()
codeobj = self.space.interp_w(PyCode, w_codeobj)
if freevars is not None:
@@ -1647,7 +1647,7 @@
lst = []
for i in range(itemcount-1, -1, -1):
w_item = self.peekvalue(i)
- lst.append(space.unicode_w(w_item))
+ lst.append(space.utf8_w(w_item))
self.dropvalues(itemcount)
w_res = space.newtext(u''.join(lst))
self.pushvalue(w_res)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -97,10 +97,7 @@
newtext = wrap
def text_w(self, s):
- return self.unicode_w(s).encode('utf-8')
-
- def unicode_w(self, s):
- return unicode(s)
+ return self.utf8_w(s)
def len(self, x):
return len(x)
@@ -342,14 +339,14 @@
def test_unwrap_error(self):
space = DummySpace()
valuedummy = object()
- def unicode_w(w):
+ def utf8_w(w):
if w is None:
raise OperationError(TypeError, None)
if w is valuedummy:
raise OperationError(ValueError, None)
- return str(w)
- space.unicode_w = unicode_w
- space.text_w = unicode_w
+ return bytes(w, 'utf-8')
+ space.utf8_w = utf8_w
+ space.text_w = utf8_w
excinfo = py.test.raises(OperationError, Arguments, space, [],
["a"], [1], w_starstararg={None: 1})
assert excinfo.value.w_type is TypeError
diff --git a/pypy/module/_cffi_backend/errorbox.py b/pypy/module/_cffi_backend/errorbox.py
--- a/pypy/module/_cffi_backend/errorbox.py
+++ b/pypy/module/_cffi_backend/errorbox.py
@@ -86,7 +86,7 @@
return
w_text = self.space.call_function(w_done)
- p = rffi.unicode2wcharp(self.space.unicode_w(w_text),
+ p = rffi.unicode2wcharp(self.space.utf8_w(w_text),
track_allocation=False)
if self.text_p:
rffi.free_wcharp(self.text_p, track_allocation=False)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -101,7 +101,7 @@
startpos, endpos):
w_replace, newpos = errorhandler(errors, encoding, reason, input,
startpos, endpos)
- return space.unicode_w(w_replace), newpos
+ return space.utf8_w(w_replace), newpos
return decode_call_errorhandler
def make_encode_errorhandler(self, space):
@@ -111,7 +111,7 @@
w_replace, newpos = errorhandler(errors, encoding, reason, input,
startpos, endpos)
if space.isinstance_w(w_replace, space.w_unicode):
- return space.unicode_w(w_replace), None, newpos
+ return space.utf8_w(w_replace), None, newpos
return None, space.bytes_w(w_replace), newpos
return encode_call_errorhandler
@@ -1052,7 +1052,7 @@
if errors is None:
errors = 'strict'
if space.isinstance_w(w_uni, space.w_unicode):
- uni = space.unicode_w(w_uni)
+ uni = space.utf8_w(w_uni)
state = space.fromcache(CodecState)
result = runicode.unicode_encode_unicode_internal(
uni, len(uni), errors, state.encode_error_handler)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -43,7 +43,7 @@
if w_src is None:
return default
try:
- return space.unicode_w(w_src)
+ return space.utf8_w(w_src)
except OperationError as e:
if e.match(space, space.w_TypeError):
raise oefmt(space.w_TypeError, '"%s" must be a string', attrname)
@@ -56,7 +56,7 @@
return u'\0'
if not space.isinstance_w(w_src, space.w_unicode):
raise oefmt(space.w_TypeError, '"%s" must be string, not %T', name, w_src)
- src = space.unicode_w(w_src)
+ src = space.utf8_w(w_src)
if len(src) == 1:
return src[0]
if len(src) == 0:
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -73,13 +73,13 @@
break
raise
self.line_num += 1
- line = space.unicode_w(w_line)
+ line = space.utf8_w(w_line)
for c in line:
- if c == u'\0':
+ if c == b'\0':
raise self.error(u"line contains NULL byte")
if state == START_RECORD:
- if c == u'\n' or c == u'\r':
+ if c == b'\n' or c == b'\r':
state = EAT_CRNL
continue
# normal character - handle as START_FIELD
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -42,9 +42,9 @@
if space.is_w(w_field, space.w_None):
field = u""
elif space.isinstance_w(w_field, space.w_float):
- field = space.unicode_w(space.repr(w_field))
+ field = space.utf8_w(space.repr(w_field))
else:
- field = space.unicode_w(space.str(w_field))
+ field = space.utf8_w(space.str(w_field))
#
if dialect.quoting == QUOTE_NONNUMERIC:
try:
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -218,17 +218,17 @@
def create_spec_for_function(space, w_func):
assert isinstance(w_func, Function)
- pre = u'built-in function ' if isinstance(w_func, BuiltinFunction) else u''
+ pre = b'built-in function ' if isinstance(w_func, BuiltinFunction) else b''
if w_func.w_module is not None:
- module = space.unicode_w(w_func.w_module)
- if module != u'builtins':
- return u'<%s%s.%s>' % (pre, module, w_func.getname(space))
- return u'<%s%s>' % (pre, w_func.getname(space))
+ module = space.utf8_w(w_func.w_module)
+ if module != b'builtins':
+ return b'<%s%s.%s>' % (pre, module, w_func.getname(space))
+ return b'<%s%s>' % (pre, w_func.getname(space))
def create_spec_for_object(space, w_type):
class_name = w_type.getname(space)
- return u"<'%s' object>" % (class_name,)
+ return b"<'%s' object>" % (class_name,)
class W_DelayedBuiltinStr(W_Root):
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -17,7 +17,7 @@
def raw_encode_basestring_ascii(space, w_unicode):
- u = space.unicode_w(w_unicode)
+ u = space.utf8_w(w_unicode).encode()
for i in range(len(u)):
c = ord(u[i])
if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py
--- a/pypy/module/_pypyjson/targetjson.py
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -75,10 +75,6 @@
assert isinstance(w_x, W_String)
return w_x.strval
- def unicode_w(self, w_x):
- assert isinstance(w_x, W_Unicode)
- return w_x.unival
-
@dont_inline
def call_method(self, obj, name, arg):
assert name == 'append'
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -233,8 +233,8 @@
def _dealloc_warn(self):
space = self.space
try:
- msg = (u"unclosed %s" %
- space.unicode_w(space.repr(self)))
+ msg = (b"unclosed %s" %
+ space.utf8_w(space.repr(self)))
space.warn(space.newtext(msg), space.w_ResourceWarning)
except OperationError as e:
# Spurious errors can appear at shutdown
diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -130,7 +130,7 @@
assert space.bytes_w(w_p) == packed
w_ip = space.appexec([w_socket, w_p],
"(_socket, p): return _socket.inet_ntoa(p)")
- assert space.unicode_w(w_ip) == ip
+ assert space.utf8_w(w_ip) == ip
def test_pton_ntop_ipv4():
if not hasattr(socket, 'inet_pton'):
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -111,7 +111,7 @@
def repr_w(self):
space = self.space
- u = space.unicode_w(space.repr(self.w_pattern))
+ u = space.utf8_w(space.repr(self.w_pattern)).decode()
if len(u) > 200:
u = u[:200]
flag_items = []
@@ -163,8 +163,8 @@
string = None
buf = None
space = self.space
- if space.isinstance_w(w_string, space.w_unicode):
- unicodestr = space.unicode_w(w_string)
+ if space.isinstance_w(w_string, space.w_utf8):
+ unicodestr = space.utf8_w(w_string).decode()
length = len(unicodestr)
elif space.isinstance_w(w_string, space.w_bytes):
string = space.bytes_w(w_string)
@@ -565,7 +565,7 @@
ctx = self.ctx
start, end = ctx.match_start, ctx.match_end
w_s = slice_w(space, ctx, start, end, space.w_None)
- u = space.unicode_w(space.repr(w_s))
+ u = space.utf8_w(space.repr(w_s)).decode()
if len(u) > 50:
u = u[:50]
return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
diff --git a/pypy/module/_string/formatter.py b/pypy/module/_string/formatter.py
--- a/pypy/module/_string/formatter.py
+++ b/pypy/module/_string/formatter.py
@@ -1,10 +1,10 @@
def formatter_parser(space, w_unicode):
from pypy.objspace.std.newformat import unicode_template_formatter
- tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+ tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
return tformat.formatter_parser()
def formatter_field_name_split(space, w_unicode):
from pypy.objspace.std.newformat import unicode_template_formatter
- tformat = unicode_template_formatter(space, space.unicode_w(w_unicode))
+ tformat = unicode_template_formatter(space, space.utf8_w(w_unicode))
return tformat.formatter_field_name_split()
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -247,9 +247,9 @@
w_stderr = space.sys.get("stderr")
# Print "filename:lineno: category: text\n"
- message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
- space.unicode_w(w_name),
- space.unicode_w(w_text))
+ message = b"%s:%d: %s: %s\n" % (space.utf8_w(w_filename), lineno,
+ space.utf8_w(w_name),
+ space.utf8_w(w_text))
space.call_method(w_stderr, "write", space.newtext(message))
# Print " source_line\n"
@@ -267,7 +267,7 @@
if not w_sourceline:
return
- line = space.unicode_w(w_sourceline)
+ line = space.utf8_w(w_sourceline)
if not line:
return
diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -222,7 +222,7 @@
if typ != rwinreg.REG_SZ:
raise oefmt(space.w_ValueError, "Type must be winreg.REG_SZ")
hkey = hkey_w(w_hkey, space)
- with rffi.scoped_unicode2wcharp(space.unicode_w(w_subkey)) as subkey:
+ with rffi.scoped_unicode2wcharp(space.utf8_w(w_subkey).decode()) as subkey:
c_subkey = rffi.cast(rffi.CCHARP, subkey)
with rffi.scoped_unicode2wcharp(value) as dataptr:
c_dataptr = rffi.cast(rffi.CCHARP, dataptr)
@@ -246,7 +246,7 @@
if space.is_w(w_subkey, space.w_None):
subkey = None
else:
- subkey = space.unicode_w(w_subkey)
+ subkey = space.utf8_w(w_subkey).decode()
with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
with lltype.scoped_alloc(rwin32.PLONG.TO, 1) as bufsize_p:
@@ -296,7 +296,7 @@
buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw')
buf[0] = '\0'
else:
- buf = rffi.unicode2wcharp(space.unicode_w(w_value))
+ buf = rffi.unicode2wcharp(space.utf8_w(w_value).decode())
buf = rffi.cast(rffi.CCHARP, buf)
buflen = (space.len_w(w_value) * 2) + 1
@@ -314,7 +314,7 @@
while True:
try:
w_item = space.next(w_iter)
- item = space.unicode_w(w_item)
+ item = space.utf8_w(w_item).decode()
strings.append(item)
buflen += 2 * (len(item) + 1)
except OperationError as e:
@@ -455,7 +455,7 @@
if space.is_w(w_subkey, space.w_None):
subkey = None
else:
- subkey = space.unicode_w(w_subkey)
+ subkey = space.utf8_w(w_subkey).decode()
null_dword = lltype.nullptr(rwin32.LPDWORD.TO)
with rffi.scoped_unicode2wcharp(subkey) as wide_subkey:
c_subkey = rffi.cast(rffi.CCHARP, wide_subkey)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -754,7 +754,7 @@
return space.newtext("array('%s')" % self.typecode)
elif self.typecode == "u":
r = space.repr(self.descr_tounicode(space))
- s = u"array('u', %s)" % space.unicode_w(r)
+ s = b"array('b', %s)" % space.utf8_w(r)
return space.newtext(s)
else:
r = space.repr(self.descr_tolist(space))
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1707,13 +1707,13 @@
else:
look_for = also_look_for
assert look_for is not None
- msg = u"function %s not found in library %s" % (
- look_for.decode('utf-8'), space.unicode_w(space.newfilename(path)))
+ msg = b"function %s not found in library %s" % (
+ look_for.decode('utf-8'), space.utf8_w(space.newfilename(path)))
w_path = space.newfilename(path)
raise_import_error(space, space.newtext(msg), w_name, w_path)
def get_init_name(space, w_name):
- name_u = space.unicode_w(w_name)
+ name_u = space.utf8_w(w_name).decode()
basename_u = name_u.split(u'.')[-1]
try:
basename = basename_u.encode('ascii')
diff --git a/pypy/module/cpyext/state.py b/pypy/module/cpyext/state.py
--- a/pypy/module/cpyext/state.py
+++ b/pypy/module/cpyext/state.py
@@ -141,7 +141,7 @@
argv = space.sys.get('argv')
if space.len_w(argv):
argv0 = space.getitem(argv, space.newint(0))
- progname = space.unicode_w(argv0)
+ progname = space.utf8_w(argv0).decode()
else:
progname = u"pypy3"
self.programname = rffi.unicode2wcharp(progname)
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -101,7 +101,7 @@
def is_interned_string(space, w_obj):
try:
- u = space.unicode_w(w_obj)
+ u = space.utf8_w(w_obj)
except OperationError:
return False
return space.interned_strings.get(u) is not None
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -658,8 +658,8 @@
b_text = rffi.str2charp('caf\x82xx')
b_encoding = rffi.str2charp('cp437')
b_errors = rffi.str2charp('strict')
- assert space.unicode_w(PyUnicode_Decode(
- space, b_text, 4, b_encoding, b_errors)) == u'caf\xe9'
+ assert space.utf8_w(PyUnicode_Decode(
+ space, b_text, 4, b_encoding, b_errors)).decode() == u'caf\xe9'
assert (space.utf8_w(
PyUnicode_Decode(space, b_text, 4, b_encoding, None)) ==
u'caf\xe9'.encode("utf-8"))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -72,7 +72,7 @@
def unicode_attach(space, py_obj, w_obj, w_userdata=None):
"Fills a newly allocated PyUnicodeObject with a unicode string"
- value = space.unicode_w(w_obj)
+ value = space.utf8_w(w_obj).decode()
set_wsize(py_obj, len(value))
set_wbuffer(py_obj, lltype.nullptr(rffi.CWCHARP.TO))
_readify(space, py_obj, value)
@@ -353,7 +353,7 @@
if not get_wbuffer(ref):
# Copy unicode buffer
w_unicode = from_ref(space, rffi.cast(PyObject, ref))
- u = space.unicode_w(w_unicode)
+ u = space.utf8_w(w_unicode).decode()
set_wbuffer(ref, rffi.unicode2wcharp(u))
set_wsize(ref, len(u))
if psize:
@@ -943,7 +943,7 @@
than, equal, and greater than, respectively. It is best to pass only
ASCII-encoded strings, but the function interprets the input string as
ISO-8859-1 if it contains non-ASCII characters."""
- uni = space.unicode_w(w_uni)
+ uni = space.utf8_w(w_uni).decode()
i = 0
# Compare Unicode string and source character set string
while i < len(uni) and string[i] != '\0':
@@ -1054,7 +1054,7 @@
@cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
def PyUnicode_Substring(space, w_str, start, end):
- usrc = space.unicode_w(w_str)
+ usrc = space.utf8_w(w_str).decode()
length = len(usrc)
if start < 0 or end < 0:
raise oefmt(space.w_IndexError, "string index out of range")
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -150,10 +150,10 @@
def descr_repr(self, space):
if self.args_w:
- args_repr = space.unicode_w(
+ args_repr = space.utf8_w(
space.repr(space.newtuple(self.args_w)))
else:
- args_repr = u"()"
+ args_repr = b"()"
clsname = self.getclass(space).getname(space)
return space.newtext(clsname + args_repr)
@@ -587,38 +587,38 @@
def descr_str(self, space):
if self.w_errno:
- errno = space.unicode_w(space.str(self.w_errno))
+ errno = space.utf8_w(space.str(self.w_errno))
else:
- errno = u""
+ errno = b""
if self.w_strerror:
- strerror = space.unicode_w(space.str(self.w_strerror))
+ strerror = space.utf8_w(space.str(self.w_strerror))
else:
- strerror = u""
+ strerror = b""
if rwin32.WIN32 and self.w_winerror:
- winerror = space.unicode_w(space.str(self.w_winerror))
+ winerror = space.utf8_w(space.str(self.w_winerror))
# If available, winerror has the priority over errno
if self.w_filename:
if self.w_filename2:
- return space.newtext(u"[WinError %s] %s: %s -> %s" % (
+ return space.newtext(b"[WinError %s] %s: %s -> %s" % (
winerror, strerror,
- space.unicode_w(space.repr(self.w_filename)),
- space.unicode_w(space.repr(self.w_filename2))))
- return space.newtext(u"[WinError %s] %s: %s" % (
+ space.utf8_w(space.repr(self.w_filename)),
+ space.utf8_w(space.repr(self.w_filename2))))
+ return space.newtext(b"[WinError %s] %s: %s" % (
winerror, strerror,
- space.unicode_w(space.repr(self.w_filename))))
- return space.newtext(u"[WinError %s] %s" % (
+ space.utf8_w(space.repr(self.w_filename))))
+ return space.newtext(b"[WinError %s] %s" % (
winerror, strerror))
if self.w_filename:
if self.w_filename2:
- return space.newtext(u"[Errno %s] %s: %s -> %s" % (
+ return space.newtext(b"[Errno %s] %s: %s -> %s" % (
errno, strerror,
- space.unicode_w(space.repr(self.w_filename)),
- space.unicode_w(space.repr(self.w_filename2))))
- return space.newtext(u"[Errno %s] %s: %s" % (
+ space.utf8_w(space.repr(self.w_filename)),
+ space.utf8_w(space.repr(self.w_filename2))))
+ return space.newtext(b"[Errno %s] %s: %s" % (
errno, strerror,
- space.unicode_w(space.repr(self.w_filename))))
+ space.utf8_w(space.repr(self.w_filename))))
if self.w_errno and self.w_strerror:
- return space.newtext(u"[Errno %s] %s" % (
+ return space.newtext(b"[Errno %s] %s" % (
errno, strerror))
return W_BaseException.descr_str(self, space)
@@ -785,7 +785,7 @@
values_w = space.fixedview(self.args_w[1])
w_tuple = space.newtuple(values_w + [self.w_lastlineno])
args_w = [self.args_w[0], w_tuple]
- args_repr = space.unicode_w(space.repr(space.newtuple(args_w)))
+ args_repr = space.utf8_w(space.repr(space.newtuple(args_w)))
clsname = self.getclass(space).getname(space)
return space.newtext(clsname + args_repr)
else:
@@ -793,15 +793,15 @@
# CPython Issue #21669: Custom error for 'print' & 'exec' as statements
def _report_missing_parentheses(self, space):
- text = space.unicode_w(self.w_text)
- if u'(' in text:
+ text = space.utf8_w(self.w_text)
+ if b'(' in text:
# Use default error message for any line with an opening paren
return
# handle the simple statement case
if self._check_for_legacy_statements(space, text, 0):
return
# Handle the one-line complex statement case
- pos = text.find(u':')
+ pos = text.find(b':')
if pos < 0:
return
# Check again, starting from just after the colon
@@ -817,11 +817,11 @@
if start > 0:
text = text[start:]
# Check for legacy print statements
- if text.startswith(u"print "):
+ if text.startswith(b"print "):
self.w_msg = space.newtext("Missing parentheses in call to 'print'")
return True
# Check for legacy exec statements
- if text.startswith(u"exec "):
+ if text.startswith(b"exec "):
self.w_msg = space.newtext("Missing parentheses in call to 'exec'")
return True
return False
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2258,7 +2258,7 @@
space.newint(info[2])])
def _getfinalpathname(space, w_path):
- path = space.unicode_w(w_path)
+ path = space.utf8_w(w_path)
try:
result = nt._getfinalpathname(path)
except nt.LLNotImplemented as e:
diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -27,7 +27,7 @@
if space.isinstance_w(w_path, space.w_bytes):
raise oefmt(space.w_TypeError, "os.scandir() doesn't support bytes path"
" on Windows, use Unicode instead")
- path = space.unicode_w(w_path)
+ path = space.utf8_w(w_path)
result_is_bytes = False
# 'path' is always bytes on posix and always unicode on windows
@@ -157,8 +157,8 @@
self.w_name = w_name
def descr_repr(self, space):
- u = space.unicode_w(space.repr(self.w_name))
- return space.newtext(u"<DirEntry %s>" % u)
+ u = space.utf8_w(space.repr(self.w_name))
+ return space.newtext(b"<DirEntry %s>" % u)
def fget_name(self, space):
return self.w_name
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -639,7 +639,7 @@
"""Parse(data[, isfinal])
Parse XML data. `isfinal' should be true at end of input."""
if space.isinstance_w(w_data, space.w_unicode):
- data = encode_utf8(space, w_data.unicode_w(space))
+ data = encode_utf8(space, w_data.utf8_w(space))
# Explicitly set UTF-8 encoding. Return code ignored.
XML_SetEncoding(self.itself, "utf-8")
else:
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -616,7 +616,7 @@
# it saves the string that is later deleted when this
# function is called again. A refactoring of this module
# could remove this
- tm_zone = encode_utf8(space, space.unicode_w(tup_w[9]), allow_surrogates=True)
+ tm_zone = space.utf8_w(tup_w[9])
malloced_str = rffi.str2charp(tm_zone, track_allocation=False)
if old_tm_zone != lltype.nullptr(rffi.CCHARP.TO):
rffi.free_charp(old_tm_zone, track_allocation=False)
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -203,7 +203,7 @@
def descr_fromhex(space, w_bytearraytype, w_hexstring):
if not space.is_w(space.type(w_hexstring), space.w_unicode):
raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
- hexstring = space.unicode_w(w_hexstring)
+ hexstring = space.utf8_w(w_hexstring)
data = _hexstring_to_array(space, hexstring)
# in CPython bytearray.fromhex is a staticmethod, so
# we ignore w_type and always return a bytearray
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -566,7 +566,7 @@
if not space.is_w(space.type(w_hexstring), space.w_unicode):
raise oefmt(space.w_TypeError, "must be str, not %T", w_hexstring)
from pypy.objspace.std.bytearrayobject import _hexstring_to_array
- hexstring = space.unicode_w(w_hexstring)
+ hexstring = space.utf8_w(w_hexstring)
bytes = ''.join(_hexstring_to_array(space, hexstring))
return W_BytesObject(bytes)
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1436,8 +1436,8 @@
def descr_repr(self, space):
typename = space.type(self).getname(space)
w_seq = space.call_function(space.w_list, self)
- seq_repr = space.unicode_w(space.repr(w_seq))
- return space.newtext(u"%s(%s)" % (typename, seq_repr))
+ seq_repr = space.utf8_w(space.repr(w_seq))
+ return space.newtext(b"%s(%s)" % (typename, seq_repr))
def descr_len(self, space):
return space.len(self.w_dict)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -44,8 +44,8 @@
return space.str(self.w_mapping)
def descr_repr(self, space):
- return space.newtext(u"mappingproxy(%s)" %
- (space.unicode_w(space.repr(self.w_mapping)),))
+ return space.newtext(b"mappingproxy(%s)" %
+ (space.utf8_w(space.repr(self.w_mapping)),))
@unwrap_spec(w_default=WrappedDefault(None))
def get_w(self, space, w_key, w_default):
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -447,14 +447,14 @@
# arbitrary unicode chars if w_value is an arbitrary unicode
# string
w_value = self.space.repr(w_value)
- self.std_wp(self.space.unicode_w(w_value))
+ self.std_wp(self.space.utf8_w(w_value))
def fmt_a(self, w_value):
from pypy.objspace.std.unicodeobject import ascii_from_object
w_value = ascii_from_object(self.space, w_value)
# %a calls ascii(), which should return an ascii unicode string
if do_unicode:
- value = self.space.unicode_w(w_value)
+ value = self.space.utf8_w(w_value)
else:
value = self.space.text_w(w_value)
self.std_wp(value)
@@ -498,7 +498,7 @@
raise oefmt(space.w_TypeError, "%c requires int or single byte")
else:
if space.isinstance_w(w_value, space.w_unicode):
- ustr = space.unicode_w(w_value)
+ ustr = space.utf8_w(w_value)
if len(ustr) == 1:
self.std_wp(ustr)
return
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -387,8 +387,7 @@
def _unmarshal_strlist(u):
items_w = _unmarshal_tuple_w(u)
- return [_encode_utf8(u.space, u.space.unicode_w(w_item))
- for w_item in items_w]
+ return [u.space.utf8_w(w_item) for w_item in items_w]
def _unmarshal_tuple_w(u):
w_obj = u.get_w_obj()
@@ -414,8 +413,8 @@
varnames = _unmarshal_strlist(u)
freevars = _unmarshal_strlist(u)
cellvars = _unmarshal_strlist(u)
- filename = _encode_utf8(space, space.unicode0_w(u.get_w_obj()))
- name = _encode_utf8(space, space.unicode_w(u.get_w_obj()))
+ filename = space.utf8_0_w(u.get_w_obj())
+ name = space.utf8_w(u.get_w_obj())
firstlineno = u.get_int()
lnotab = space.bytes_w(u.get_w_obj())
filename = assert_str0(filename)
@@ -442,12 +441,11 @@
m.atom_str(typecode, s)
# surrogate-preserving variants
-_encode_utf8 = unicodehelper.encode_utf8sp
_decode_utf8 = unicodehelper.decode_utf8sp
@marshaller(W_UnicodeObject)
def marshal_unicode(space, w_unicode, m):
- s = _encode_utf8(space, space.unicode_w(w_unicode))
+ s = space.utf8_w(w_unicode)
_marshal_unicode(space, s, m, w_unicode=w_unicode)
@unmarshaller(TYPE_UNICODE)
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -572,7 +572,7 @@
space = self.space
if not space.is_w(space.type(w_string), space.w_unicode):
w_string = space.str(w_string)
- string = space.unicode_w(w_string)
+ string = space.utf8_w(w_string)
if self._parse_spec("s", "<"):
return self.wrap(string)
if self._type != "s":
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -762,12 +762,12 @@
w_module = w_type.lookup("__module__")
if w_module is not None:
try:
- modulename = self.unicode_w(w_module)
+ modulename = self.utf8_w(w_module)
except OperationError as e:
if not e.match(self, self.w_TypeError):
raise
else:
- classname = u'%s.%s' % (modulename, classname)
+ classname = b'%s.%s' % (modulename, classname)
else:
classname = w_type.name.decode('utf-8')
return classname
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1291,9 +1291,9 @@
assert isinstance(string, str)
return string
- def unicode_w(self, u):
- assert isinstance(u, unicode)
- return u
+ def utf8_w(self, b):
+ assert isinstance(u, str)
+ return b
def int_w(self, integer, allow_conversion=True):
assert isinstance(integer, int)
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -103,10 +103,10 @@
items = self.tolist()
if len(items) == 1:
return space.newtext(
- u"(" + space.unicode_w(space.repr(items[0])) + u",)")
- tmp = u", ".join([space.unicode_w(space.repr(item))
+ b"(" + space.utf8_w(space.repr(items[0])) + b",)")
+ tmp = b", ".join([space.utf8_w(space.repr(item))
for item in items])
- return space.newtext(u"(" + tmp + u")")
+ return space.newtext(b"(" + tmp + b")")
def descr_hash(self, space):
raise NotImplementedError
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -204,7 +204,7 @@
w_qualname = self.dict_w.pop('__qualname__', None)
if w_qualname is not None:
if space.isinstance_w(w_qualname, space.w_unicode):
- self.qualname = space.unicode_w(w_qualname)
+ self.qualname = space.utf8_w(w_qualname)
elif not self.flag_cpytype:
raise oefmt(space.w_TypeError,
"type __qualname__ must be a str, not %T",
@@ -723,9 +723,9 @@
if w_mod is None or not space.isinstance_w(w_mod, space.w_text):
mod = None
else:
- mod = space.unicode_w(w_mod)
- if mod is not None and mod != u'builtins':
- return space.newtext(u"<class '%s.%s'>" % (mod, self.getqualname(space)))
+ mod = space.utf8_w(w_mod)
+ if mod is not None and mod != b'builtins':
+ return space.newtext(b"<class '%s.%s'>" % (mod, self.getqualname(space)))
else:
return space.newtext("<class '%s'>" % (self.name,))
@@ -869,7 +869,7 @@
w_type = _check(space, w_type)
if not w_type.is_heaptype():
raise oefmt(space.w_TypeError, "can't set %N.__qualname__", w_type)
- w_type.qualname = space.unicode_w(w_value)
+ w_type.qualname = space.utf8_w(w_value)
def descr_get__mro__(space, w_type):
w_type = _check(space, w_type)
@@ -1158,7 +1158,7 @@
if not space.isinstance_w(w_name, space.w_text):
raise oefmt(space.w_TypeError,
"__slots__ items must be strings, not '%T'", w_name)
- if not _isidentifier(space.unicode_w(w_name)):
+ if not _isidentifier(space.utf8_w(w_name)):
raise oefmt(space.w_TypeError, "__slots__ must be identifiers")
return w_name.text_w(space)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -210,15 +210,15 @@
@staticmethod
def descr_maketrans(space, w_type, w_x, w_y=None, w_z=None):
- y = None if space.is_none(w_y) else space.unicode_w(w_y)
- z = None if space.is_none(w_z) else space.unicode_w(w_z)
+ y = None if space.is_none(w_y) else space.utf8_w(w_y)
+ z = None if space.is_none(w_z) else space.utf8_w(w_z)
w_new = space.newdict()
if y is not None:
# x must be a string too, of equal length
ylen = len(y)
try:
- x = space.unicode_w(w_x)
+ x = space.utf8_w(w_x)
except OperationError as e:
if not e.match(space, space.w_TypeError):
raise
@@ -257,7 +257,7 @@
w_key, w_value = space.unpackiterable(w_item, 2)
if space.isinstance_w(w_key, space.w_unicode):
# convert string keys to integer keys
- key = space.unicode_w(w_key)
+ key = space.utf8_w(w_key)
if len(key) != 1:
raise oefmt(space.w_ValueError,
"string keys in translate table must be "
@@ -283,7 +283,7 @@
if space.is_w(space.type(self), space.w_unicode):
return self
# Subtype -- return genuine unicode string with the same value.
- return space.newtext(space.unicode_w(self))
+ return space.newtext(space.utf8_w(self))
def descr_hash(self, space):
x = compute_hash(self._utf8)
More information about the pypy-commit
mailing list