[pypy-commit] pypy unicode-utf8-py3: newunicode -> newtext, newtext now accepts utf8-encoded bytes or unicode
mattip
pypy.commits at gmail.com
Thu Jun 14 01:43:24 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94761:4c4b3a83fd29
Date: 2018-06-13 21:00 -0700
http://bitbucket.org/pypy/pypy/changeset/4c4b3a83fd29/
Log: newunicode -> newtext, newtext now accepts utf8-encoded bytes or
unicode
diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py
--- a/pypy/interpreter/astcompiler/fstring.py
+++ b/pypy/interpreter/astcompiler/fstring.py
@@ -23,7 +23,7 @@
def f_constant_string(astbuilder, joined_pieces, u, atom_node):
space = astbuilder.space
- add_constant_string(astbuilder, joined_pieces, space.newunicode(u),
+ add_constant_string(astbuilder, joined_pieces, space.newtext(u),
atom_node)
def f_string_compile(astbuilder, source, atom_node):
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -107,7 +107,7 @@
def getrepr(self, space, info, moreinfo=u''):
addrstring = unicode(self.getaddrstring(space))
- return space.newunicode(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo))
+ return space.newtext(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo))
def getslotvalue(self, index):
raise NotImplementedError
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -307,7 +307,7 @@
w_value = self._w_value
if w_value is None:
value = self._compute_value(space)
- self._w_value = w_value = space.newunicode(value)
+ self._w_value = w_value = space.newtext(value)
return w_value
def _compute_value(self, space):
@@ -626,7 +626,7 @@
msg = u'Windows Error %d' % winerror
w_errno = space.w_None
w_winerror = space.newint(winerror)
- w_msg = space.newunicode(msg)
+ w_msg = space.newtext(msg)
else:
errno = e.errno
if errno == EINTR:
@@ -640,7 +640,7 @@
msg = u'error %d' % errno
w_errno = space.newint(errno)
w_winerror = space.w_None
- w_msg = space.newunicode(msg)
+ w_msg = space.newtext(msg)
if w_filename is None:
w_filename = space.w_None
@@ -672,7 +672,7 @@
def exception_from_errno(space, w_type, errno):
msg = strerror(errno)
w_error = space.call_function(w_type, space.newint(errno),
- space.newunicode(msg))
+ space.newtext(msg))
return OperationError(w_type, w_error)
def exception_from_saved_errno(space, w_type):
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -313,7 +313,7 @@
tup_base = []
tup_state = [
space.newtext(self.name),
- space.newunicode(self.qualname),
+ space.newtext(self.qualname),
w_doc,
self.code,
w_func_globals,
@@ -430,7 +430,7 @@
"__name__ must be set to a string object")
def fget_func_qualname(self, space):
- return space.newunicode(self.qualname)
+ return space.newtext(self.qualname)
def fset_func_qualname(self, space, w_name):
try:
@@ -556,7 +556,7 @@
name = u'?'
objrepr = space.unicode_w(space.repr(self.w_instance))
s = u'<bound method %s of %s>' % (name, objrepr)
- return space.newunicode(s)
+ return space.newtext(s)
def descr_method_getattribute(self, w_attr):
space = self.space
@@ -598,7 +598,7 @@
else:
w_builtins = space.getbuiltinmodule('builtins')
new_inst = space.getattr(w_builtins, space.newtext('getattr'))
- tup = [w_instance, space.newunicode(w_function.getname(space))]
+ tup = [w_instance, space.newtext(w_function.getname(space))]
return space.newtuple([new_inst, space.newtuple(tup)])
@@ -699,7 +699,7 @@
return self.space.newtext('<built-in function %s>' % (self.name,))
def descr__reduce__(self, space):
- return space.newunicode(self.qualname)
+ return space.newtext(self.qualname)
def is_builtin_code(w_func):
from pypy.interpreter.gateway import BuiltinCode
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -1122,7 +1122,7 @@
kw_defs_w = []
for name, w_def in sorted(alldefs_w.items()):
assert name in sig.kwonlyargnames
- w_name = space.newunicode(name.decode('utf-8'))
+ w_name = space.newtext(name.decode('utf-8'))
kw_defs_w.append((w_name, w_def))
return defs_w, kw_defs_w
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -42,7 +42,7 @@
def descr__repr__(self, space):
addrstring = self.getaddrstring(space)
- return space.newunicode(u"<%s object %s at 0x%s>" %
+ return space.newtext(u"<%s object %s at 0x%s>" %
(unicode(self.KIND),
self.get_qualname(),
unicode(addrstring)))
@@ -215,7 +215,7 @@
e2.record_context(space, space.getexecutioncontext())
raise e2
else:
- space.warn(space.newunicode(u"generator '%s' raised StopIteration"
+ space.warn(space.newtext(u"generator '%s' raised StopIteration"
% self.get_qualname()),
space.w_PendingDeprecationWarning)
@@ -306,7 +306,7 @@
"__name__ must be set to a string object")
def descr__qualname__(self, space):
- return space.newunicode(self.get_qualname())
+ return space.newtext(self.get_qualname())
def descr_set__qualname__(self, space, w_name):
try:
@@ -398,7 +398,7 @@
self.frame.last_instr == -1:
space = self.space
msg = u"coroutine '%s' was never awaited" % self.get_qualname()
- space.warn(space.newunicode(msg), space.w_RuntimeWarning)
+ space.warn(space.newtext(msg), space.w_RuntimeWarning)
GeneratorOrCoroutine._finalize_(self)
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -454,6 +454,6 @@
# co_name should be an identifier
name = self.co_name.decode('utf-8')
fn = space.unicode_w(self.w_filename)
- return space.newunicode(u'<code object %s at 0x%s, file "%s", line %d>' % (
+ return space.newtext(u'<code object %s at 0x%s, file "%s", line %d>' % (
name, unicode(self.getaddrstring(space)), fn,
-1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,7 +1081,7 @@
try:
w_pkgname = space.getattr(
w_module, space.newtext('__name__'))
- w_fullname = space.newunicode(u'%s.%s' %
+ w_fullname = space.newtext(u'%s.%s' %
(space.unicode_w(w_pkgname), space.unicode_w(w_name)))
return space.getitem(space.sys.get('modules'), w_fullname)
except OperationError:
@@ -1626,7 +1626,7 @@
if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC:
w_spec = self.popvalue()
else:
- w_spec = space.newunicode(u'')
+ w_spec = space.newtext(u'')
w_value = self.popvalue()
#
conversion = oparg & consts.FVC_MASK
@@ -1649,7 +1649,7 @@
w_item = self.peekvalue(i)
lst.append(space.unicode_w(w_item))
self.dropvalues(itemcount)
- w_res = space.newunicode(u''.join(lst))
+ w_res = space.newtext(u''.join(lst))
self.pushvalue(w_res)
def _revdb_load_var(self, oparg):
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -42,7 +42,7 @@
if len(self.text) != offset:
text, _ = str_decode_utf_8(self.text, len(self.text),
'replace')
- w_text = space.newunicode(text)
+ w_text = space.newtext(text)
return space.newtuple([
space.newtext(self.msg),
space.newtuple([
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -115,7 +115,7 @@
return W_FString(substr, rawmode)
else:
v = unicodehelper.decode_utf8(space, substr)
- return space.newunicode(v)
+ return space.newtext(v)
v = PyString_DecodeEscape(space, substr, 'strict', encoding)
return space.newbytes(v)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -95,7 +95,6 @@
def wrap(self, obj):
return obj
newtext = wrap
- newunicode = wrap
def text_w(self, s):
return self.unicode_w(s).encode('utf-8')
diff --git a/pypy/interpreter/test/test_error.py b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -135,7 +135,7 @@
w_None = None
def wrap(self, obj):
return [obj]
- newint = newtext = newunicode = newfilename = wrap
+ newint = newtext = newfilename = wrap
def call_function(self, exc, w_errno, w_msg, w_filename=None, *args):
return (exc, w_errno, w_msg, w_filename)
space = FakeSpace()
diff --git a/pypy/interpreter/test/test_fsencode.py b/pypy/interpreter/test/test_fsencode.py
--- a/pypy/interpreter/test/test_fsencode.py
+++ b/pypy/interpreter/test/test_fsencode.py
@@ -70,7 +70,7 @@
strs.append(self.special_char)
for st in strs:
# check roundtrip
- w_st = space.newunicode(st)
+ w_st = space.newtext(st)
w_enc = space.fsencode(w_st)
w_st2 = space.fsdecode(w_enc)
assert space.eq_w(w_st, w_st2)
@@ -81,7 +81,7 @@
def test_null_byte(self):
space = self.space
- w_u = space.newunicode(u'abc\x00def')
+ w_u = space.newtext(u'abc\x00def')
# this can behave in two different ways depending on how
# much initialized the space is: space.fsencode() can raise
# ValueError directly, or return a wrapped bytes with the 0
@@ -94,7 +94,7 @@
if self.special_char:
strs.append(self.special_char)
for st in strs:
- w_st = space.newunicode(st)
+ w_st = space.newtext(st)
w_enc = space.fsencode(w_st)
space.appexec([w_st, w_enc], """(u, s):
import __pypy__
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -87,7 +87,7 @@
return space.call_method(w_string, 'decode',
getfilesystemencoding(space),
space.newtext('surrogateescape'))
- return space.newunicode(uni)
+ return space.newtext(uni)
def fsencode(space, w_uni):
from pypy.module._codecs import interp_codecs
diff --git a/pypy/module/__builtin__/descriptor.py b/pypy/module/__builtin__/descriptor.py
--- a/pypy/module/__builtin__/descriptor.py
+++ b/pypy/module/__builtin__/descriptor.py
@@ -37,7 +37,7 @@
starttype_name = self.w_starttype.getname(space)
else:
starttype_name = u'NULL'
- return space.newunicode(u"<super: <class '%s'>, %s>" % (
+ return space.newtext(u"<super: <class '%s'>, %s>" % (
starttype_name, objtype_name))
def get(self, space, w_obj, w_type=None):
diff --git a/pypy/module/__pypy__/interp_stderrprinter.py b/pypy/module/__pypy__/interp_stderrprinter.py
--- a/pypy/module/__pypy__/interp_stderrprinter.py
+++ b/pypy/module/__pypy__/interp_stderrprinter.py
@@ -17,7 +17,7 @@
def descr_repr(self, space):
addrstring = unicode(self.getaddrstring(space))
- return space.newunicode(u"<StdErrPrinter(fd=%d) object at 0x%s>" %
+ return space.newtext(u"<StdErrPrinter(fd=%d) object at 0x%s>" %
(self.fd, addrstring))
def descr_noop(self, space):
diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py
--- a/pypy/module/_cffi_backend/cerrno.py
+++ b/pypy/module/_cffi_backend/cerrno.py
@@ -27,4 +27,4 @@
if code == -1:
code = GetLastError_alt_saved()
message = FormatErrorW(code)
- return space.newtuple([space.newint(code), space.newunicode(message)])
+ return space.newtuple([space.newint(code), space.newtext(message)])
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -298,7 +298,7 @@
oc = ord(obj[pos])
raw_unicode_escape_helper_unicode(builder, oc)
pos += 1
- return space.newtuple([space.newunicode(builder.build()), w_end])
+ return space.newtuple([space.newtext(builder.build()), w_end])
elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
obj = space.bytes_w(space.getattr(w_exc, space.newtext('object')))
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
@@ -310,7 +310,7 @@
oc = ord(obj[pos])
raw_unicode_escape_helper_unicode(builder, oc)
pos += 1
- return space.newtuple([space.newunicode(builder.build()), w_end])
+ return space.newtuple([space.newtext(builder.build()), w_end])
else:
raise oefmt(space.w_TypeError,
"don't know how to handle %T in error callback", w_exc)
@@ -456,7 +456,7 @@
ch = 0
if ch == 0:
raise OperationError(space.type(w_exc), w_exc)
- return space.newtuple([space.newunicode(unichr(ch)),
+ return space.newtuple([space.newtext(unichr(ch)),
space.newint(start + bytelength)])
else:
raise oefmt(space.w_TypeError,
@@ -495,7 +495,7 @@
if not consumed:
# codec complained about ASCII byte.
raise OperationError(space.type(w_exc), w_exc)
- return space.newtuple([space.newunicode(replace),
+ return space.newtuple([space.newtext(replace),
space.newint(start + consumed)])
else:
raise oefmt(space.w_TypeError,
@@ -746,7 +746,7 @@
string, len(string), errors,
final, state.decode_error_handler,
force_ignore=False)
- return space.newtuple([space.newunicode(result), space.newint(consumed)])
+ return space.newtuple([space.newtext(result), space.newint(consumed)])
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=False"
@@ -1014,7 +1014,7 @@
result, consumed = runicode.str_decode_raw_unicode_escape(
string, len(string), errors,
final, state.decode_error_handler)
- return space.newtuple([space.newunicode(result), space.newint(consumed)])
+ return space.newtuple([space.newtext(result), space.newint(consumed)])
# ____________________________________________________________
# Unicode-internal
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
--- a/pypy/module/_csv/interp_csv.py
+++ b/pypy/module/_csv/interp_csv.py
@@ -156,12 +156,12 @@
def _get_escapechar(space, dialect):
if dialect.escapechar == u'\0':
return space.w_None
- return space.newunicode(dialect.escapechar)
+ return space.newtext(dialect.escapechar)
def _get_quotechar(space, dialect):
if dialect.quotechar == u'\0':
return space.w_None
- return space.newunicode(dialect.quotechar)
+ return space.newtext(dialect.quotechar)
W_Dialect.typedef = TypeDef(
@@ -169,12 +169,12 @@
__new__ = interp2app(W_Dialect___new__),
delimiter = interp_attrproperty('delimiter', W_Dialect,
- wrapfn='newunicode'),
+ wrapfn='newtext'),
doublequote = interp_attrproperty('doublequote', W_Dialect,
wrapfn='newbool'),
escapechar = GetSetProperty(_get_escapechar, cls=W_Dialect),
lineterminator = interp_attrproperty('lineterminator', W_Dialect,
- wrapfn='newunicode'),
+ wrapfn='newtext'),
quotechar = GetSetProperty(_get_quotechar, cls=W_Dialect),
quoting = interp_attrproperty('quoting', W_Dialect,
wrapfn='newint'),
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -31,7 +31,7 @@
msg = u'line %d: %s' % (self.line_num, msg)
w_module = space.getbuiltinmodule('_csv')
w_error = space.getattr(w_module, space.newtext('Error'))
- raise OperationError(w_error, space.newunicode(msg))
+ raise OperationError(w_error, space.newtext(msg))
def add_char(self, field_builder, c):
assert field_builder is not None
@@ -44,9 +44,9 @@
field = field_builder.build()
if self.numeric_field:
self.numeric_field = False
- w_obj = space.call_function(space.w_float, space.newunicode(field))
+ w_obj = space.call_function(space.w_float, space.newtext(field))
else:
- w_obj = space.newunicode(field)
+ w_obj = space.newtext(field)
self.fields_w.append(w_obj)
def next_w(self):
diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py
--- a/pypy/module/_csv/interp_writer.py
+++ b/pypy/module/_csv/interp_writer.py
@@ -115,7 +115,7 @@
rec.append(dialect.lineterminator)
line = rec.build()
- return space.call_function(self.w_filewrite, space.newunicode(line))
+ return space.call_function(self.w_filewrite, space.newtext(line))
def writerows(self, w_seqseq):
"""Construct and write a series of sequences to a csv file.
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -732,7 +732,7 @@
w_bytes = space.call_method(self.w_buffer, "read")
w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
check_decoded(space, w_decoded)
- w_result = space.newunicode(self.decoded.get_chars(-1))
+ w_result = space.newtext(self.decoded.get_chars(-1))
w_final = space.add(w_result, w_decoded)
self.snapshot = None
return w_final
@@ -771,7 +771,7 @@
self._check_closed(space)
self._writeflush(space)
limit = convert_size(space, w_limit)
- return space.newunicode(self._readline(space, limit))
+ return space.newtext(self._readline(space, limit))
def _readline(self, space, limit):
# This is a separate function so that readline_w() can be jitted.
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -253,7 +253,7 @@
s = create_spec_for_object(space, self.w_type)
else:
s = create_spec_for_method(space, self.w_func, self.w_type)
- self.w_string = space.newunicode(s)
+ self.w_string = space.newtext(s)
return self.w_string
W_DelayedBuiltinStr.typedef = TypeDef(
diff --git a/pypy/module/_multiprocessing/interp_win32_py3.py b/pypy/module/_multiprocessing/interp_win32_py3.py
--- a/pypy/module/_multiprocessing/interp_win32_py3.py
+++ b/pypy/module/_multiprocessing/interp_win32_py3.py
@@ -9,7 +9,7 @@
message = rwin32.FormatErrorW(errno)
w_errcode = space.newint(errno)
return OperationError(space.w_WindowsError,
- space.newtuple([w_errcode, space.newunicode(message),
+ space.newtuple([w_errcode, space.newtext(message),
space.w_None, w_errcode]))
@unwrap_spec(handle=int)
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -630,7 +630,7 @@
if _MS_WINDOWS:
@unwrap_spec(code=int)
def FormatError(space, code):
- return space.newunicode(rwin32.FormatErrorW(code))
+ return space.newtext(rwin32.FormatErrorW(code))
@unwrap_spec(hresult=int)
def check_HRESULT(space, hresult):
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -235,7 +235,7 @@
try:
msg = (u"unclosed %s" %
space.unicode_w(space.repr(self)))
- space.warn(space.newunicode(msg), space.w_ResourceWarning)
+ space.warn(space.newtext(msg), space.w_ResourceWarning)
except OperationError as e:
# Spurious errors can appear at shutdown
if e.match(space, space.w_Warning):
@@ -863,9 +863,9 @@
if eintr_retry:
return # only return None if eintr_retry==True
w_exception = space.call_function(w_exception_class, space.newint(e.errno),
- space.newunicode(message))
+ space.newtext(message))
else:
- w_exception = space.call_function(w_exception_class, space.newunicode(message))
+ w_exception = space.call_function(w_exception_class, space.newtext(message))
raise OperationError(w_exception_class, w_exception)
def explicit_socket_error(space, msg):
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -134,7 +134,7 @@
else:
usep = u', '
uflags = u'|'.join([item.decode('latin-1') for item in flag_items])
- return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags))
+ return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags))
def fget_groupindex(self, space):
w_groupindex = self.w_groupindex
@@ -568,7 +568,7 @@
u = space.unicode_w(space.repr(w_s))
if len(u) > 50:
u = u[:50]
- return space.newunicode(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
+ return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' %
(start, end, u))
def cannot_copy_w(self):
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -250,7 +250,7 @@
message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
space.unicode_w(w_name),
space.unicode_w(w_text))
- space.call_method(w_stderr, "write", space.newunicode(message))
+ space.call_method(w_stderr, "write", space.newtext(message))
# Print " source_line\n"
if not w_sourceline:
@@ -277,7 +277,7 @@
if c not in u' \t\014':
message = u" %s\n" % (line[i:],)
break
- space.call_method(w_stderr, "write", space.newunicode(message))
+ space.call_method(w_stderr, "write", space.newtext(message))
def do_warn(space, w_message, w_category, stacklevel):
context_w = setup_context(space, stacklevel)
diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -11,7 +11,7 @@
message = rwin32.FormatErrorW(errcode)
w_errcode = space.newint(errcode)
raise OperationError(space.w_WindowsError,
- space.newtuple([w_errcode, space.newunicode(message),
+ space.newtuple([w_errcode, space.newtext(message),
space.w_None, w_errcode]))
class W_HKEY(W_Root):
@@ -33,7 +33,7 @@
return space.newint(self.as_int())
def descr_repr(self, space):
- return space.newunicode(u"<PyHKEY:0x%x>" % (self.as_int(),))
+ return space.newtext(u"<PyHKEY:0x%x>" % (self.as_int(),))
def descr_int(self, space):
return space.newint(self.as_int())
@@ -271,7 +271,7 @@
raiseWindowsError(space, ret, 'RegQueryValue')
length = intmask(bufsize_p[0] - 1) / 2
wide_buf = rffi.cast(rffi.CWCHARP, buf)
- return space.newunicode(rffi.wcharp2unicoden(wide_buf, length))
+ return space.newtext(rffi.wcharp2unicoden(wide_buf, length))
def convert_to_regdata(space, w_value, typ):
'''
@@ -378,7 +378,7 @@
if buf[buflen - 1] == '\x00':
buflen -= 1
s = rffi.wcharp2unicoden(buf, buflen)
- w_s = space.newunicode(s)
+ w_s = space.newtext(s)
return w_s
elif typ == rwinreg.REG_MULTI_SZ:
@@ -396,7 +396,7 @@
if len(s) == 0:
break
s = u''.join(s)
- l.append(space.newunicode(s))
+ l.append(space.newtext(s))
i += 1
return space.newlist(l)
@@ -645,7 +645,7 @@
length = intmask(retDataSize[0])
return space.newtuple([
- space.newunicode(rffi.wcharp2unicode(valuebuf)),
+ space.newtext(rffi.wcharp2unicode(valuebuf)),
convert_from_regdata(space, databuf,
length, retType[0]),
space.newint(intmask(retType[0])),
@@ -678,7 +678,7 @@
lltype.nullptr(rwin32.PFILETIME.TO))
if ret != 0:
raiseWindowsError(space, ret, 'RegEnumKeyEx')
- return space.newunicode(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf)))
+ return space.newtext(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf)))
def QueryInfoKey(space, w_hkey):
"""tuple = QueryInfoKey(key) - Returns information about a key.
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -755,7 +755,7 @@
elif self.typecode == "u":
r = space.repr(self.descr_tounicode(space))
s = u"array('u', %s)" % space.unicode_w(r)
- return space.newunicode(s)
+ return space.newtext(s)
else:
r = space.repr(self.descr_tolist(space))
s = "array('%s', %s)" % (self.typecode, space.text_w(r))
@@ -1141,7 +1141,7 @@
raise oefmt(space.w_ValueError,
"array contains a unicode character out of "
"range(0x110000)")
- return space.newunicode(item)
+ return space.newtext(item)
assert 0, "unreachable"
# interface
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1710,7 +1710,7 @@
msg = u"function %s not found in library %s" % (
look_for.decode('utf-8'), space.unicode_w(space.newfilename(path)))
w_path = space.newfilename(path)
- raise_import_error(space, space.newunicode(msg), w_name, w_path)
+ raise_import_error(space, space.newtext(msg), w_name, w_path)
def get_init_name(space, w_name):
name_u = space.unicode_w(w_name)
@@ -1720,7 +1720,7 @@
return 'PyInit_%s' % (basename,)
except UnicodeEncodeError:
basename = space.bytes_w(encode_object(
- space, space.newunicode(basename_u), 'punycode', None))
+ space, space.newtext(basename_u), 'punycode', None))
basename = basename.replace('-', '_')
return 'PyInitU_%s' % (basename,)
diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py
--- a/pypy/module/cpyext/pyerrors.py
+++ b/pypy/module/cpyext/pyerrors.py
@@ -215,12 +215,12 @@
if w_value:
w_error = space.call_function(w_type,
space.newint(errno),
- space.newunicode(msg),
+ space.newtext(msg),
w_value)
else:
w_error = space.call_function(w_type,
space.newint(errno),
- space.newunicode(msg))
+ space.newtext(msg))
raise OperationError(w_type, w_error)
@cpython_api([], rffi.INT_real, error=-1)
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -920,7 +920,7 @@
if decimal >= 0:
ch = unichr(ord('0') + decimal)
result.append(ch)
- return space.newunicode(result.build())
+ return space.newtext(result.build())
@cpython_api([PyObject, PyObject], rffi.INT_real, error=-2)
def PyUnicode_Compare(space, w_left, w_right):
@@ -1064,4 +1064,4 @@
if end > length:
end = length
result = usrc[start:end]
- return space.newunicode(result)
+ return space.newtext(result)
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -155,7 +155,7 @@
else:
args_repr = u"()"
clsname = self.getclass(space).getname(space)
- return space.newunicode(clsname + args_repr)
+ return space.newtext(clsname + args_repr)
def __repr__(self):
"""representation for debugging purposes"""
@@ -599,26 +599,26 @@
# If available, winerror has the priority over errno
if self.w_filename:
if self.w_filename2:
- return space.newunicode(u"[WinError %s] %s: %s -> %s" % (
+ return space.newtext(u"[WinError %s] %s: %s -> %s" % (
winerror, strerror,
space.unicode_w(space.repr(self.w_filename)),
space.unicode_w(space.repr(self.w_filename2))))
- return space.newunicode(u"[WinError %s] %s: %s" % (
+ return space.newtext(u"[WinError %s] %s: %s" % (
winerror, strerror,
space.unicode_w(space.repr(self.w_filename))))
- return space.newunicode(u"[WinError %s] %s" % (
+ return space.newtext(u"[WinError %s] %s" % (
winerror, strerror))
if self.w_filename:
if self.w_filename2:
- return space.newunicode(u"[Errno %s] %s: %s -> %s" % (
+ return space.newtext(u"[Errno %s] %s: %s -> %s" % (
errno, strerror,
space.unicode_w(space.repr(self.w_filename)),
space.unicode_w(space.repr(self.w_filename2))))
- return space.newunicode(u"[Errno %s] %s: %s" % (
+ return space.newtext(u"[Errno %s] %s: %s" % (
errno, strerror,
space.unicode_w(space.repr(self.w_filename))))
if self.w_errno and self.w_strerror:
- return space.newunicode(u"[Errno %s] %s" % (
+ return space.newtext(u"[Errno %s] %s" % (
errno, strerror))
return W_BaseException.descr_str(self, space)
@@ -787,7 +787,7 @@
args_w = [self.args_w[0], w_tuple]
args_repr = space.unicode_w(space.repr(space.newtuple(args_w)))
clsname = self.getclass(space).getname(space)
- return space.newunicode(clsname + args_repr)
+ return space.newtext(clsname + args_repr)
else:
return W_Exception.descr_repr(self, space)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -838,7 +838,7 @@
def strerror(space, code):
"""Translate an error code to a message string."""
try:
- return space.newunicode(_strerror(code))
+ return space.newtext(_strerror(code))
except ValueError:
raise oefmt(space.w_ValueError, "strerror() argument out of range")
@@ -885,7 +885,7 @@
# started through main() instead of wmain()
rwin32._wgetenv(u"")
for key, value in rwin32._wenviron_items():
- space.setitem(w_env, space.newunicode(key), space.newunicode(value))
+ space.setitem(w_env, space.newtext(key), space.newunicode(value))
@unwrap_spec(name=unicode, value=unicode)
def putenv(space, name, value):
@@ -935,7 +935,7 @@
the file descriptor must refer to a directory.
If this functionality is unavailable, using it raises NotImplementedError."""
if space.is_none(w_path):
- w_path = space.newunicode(u".")
+ w_path = space.newtext(u".")
if space.isinstance_w(w_path, space.w_bytes):
# XXX CPython doesn't follow this path either if w_path is,
# for example, a memoryview or another buffer type
@@ -968,7 +968,7 @@
result_w = [None] * len_result
for i in range(len_result):
if _WIN32:
- result_w[i] = space.newunicode(result[i])
+ result_w[i] = space.newtext(result[i])
else:
result_w[i] = space.newfilename(result[i])
return space.newlist(result_w)
@@ -2266,7 +2266,7 @@
space.newtext(e.msg))
except OSError as e:
raise wrap_oserror2(space, e, w_path, eintr_retry=False)
- return space.newunicode(result)
+ return space.newtext(result)
def chflags():
diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -14,7 +14,7 @@
def scandir(space, w_path=None):
"scandir(path='.') -> iterator of DirEntry objects for given path"
if space.is_none(w_path):
- w_path = space.newunicode(u".")
+ w_path = space.newtext(u".")
if not _WIN32:
if space.isinstance_w(w_path, space.w_bytes):
@@ -45,7 +45,7 @@
else:
if len(path_prefix) > 0 and path_prefix[-1] not in (u'\\', u'/', u':'):
path_prefix += u'\\'
- w_path_prefix = space.newunicode(path_prefix)
+ w_path_prefix = space.newtext(path_prefix)
if rposix.HAVE_FSTATAT:
dirfd = rposix.c_dirfd(dirp)
else:
@@ -153,12 +153,12 @@
if not scandir_iterator.result_is_bytes:
w_name = self.space.fsdecode(w_name)
else:
- w_name = self.space.newunicode(name)
+ w_name = self.space.newtext(name)
self.w_name = w_name
def descr_repr(self, space):
u = space.unicode_w(space.repr(self.w_name))
- return space.newunicode(u"<DirEntry %s>" % u)
+ return space.newtext(u"<DirEntry %s>" % u)
def fget_name(self, space):
return self.w_name
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -111,7 +111,7 @@
i19 = int_sub(i6, i87)
i23 = unicodegetitem(ConstPtr(ptr92), i19)
- p25 = newunicode(1)
+ p25 = newtext(1)
unicodesetitem(p25, 0, i23)
p97 = call_r(ConstClass(_rpy_unicode_to_decimal_w), p25, descr=<Callr . r EF=5>)
guard_no_exception(descr=...)
diff --git a/pypy/module/select/interp_select.py b/pypy/module/select/interp_select.py
--- a/pypy/module/select/interp_select.py
+++ b/pypy/module/select/interp_select.py
@@ -83,7 +83,7 @@
message = e.get_msg_unicode()
raise OperationError(space.w_OSError,
space.newtuple([space.newint(e.errno),
- space.newunicode(message)]))
+ space.newtext(message)]))
finally:
self.running = False
break
@@ -154,7 +154,7 @@
if err != errno.EINTR:
msg = _c.socket_strerror_unicode(err)
raise OperationError(space.w_OSError, space.newtuple([
- space.newint(err), space.newunicode(msg)]))
+ space.newint(err), space.newtext(msg)]))
# got EINTR, automatic retry
space.getexecutioncontext().checksignals()
if timeout > 0.0:
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -459,8 +459,8 @@
_set_module_object(space, "timezone", space.newint(timezone))
_set_module_object(space, 'daylight', space.newint(daylight))
- tzname_w = [space.newunicode(tzname[0].decode('latin-1')),
- space.newunicode(tzname[1].decode('latin-1'))]
+ tzname_w = [space.newtext(tzname[0].decode('latin-1')),
+ space.newtext(tzname[1].decode('latin-1'))]
_set_module_object(space, 'tzname', space.newtuple(tzname_w))
_set_module_object(space, 'altzone', space.newint(altzone))
@@ -556,7 +556,7 @@
# CPython calls PyUnicode_DecodeLocale here should we do the same?
tm_zone = decode_utf8(space, rffi.charp2str(t.c_tm_zone),
allow_surrogates=True)
- extra = [space.newunicode(tm_zone),
+ extra = [space.newtext(tm_zone),
space.newint(rffi.getintfield(t, 'c_tm_gmtoff'))]
w_time_tuple = space.newtuple(time_tuple + extra)
else:
@@ -579,7 +579,7 @@
lltype.free(t_ref, flavor='raw')
if not pbuf:
raise OperationError(space.w_ValueError,
- space.newunicode(_get_error_msg()))
+ space.newtext(_get_error_msg()))
return pbuf
tup_w = space.fixedview(w_tup)
@@ -745,7 +745,7 @@
if not p:
raise OperationError(space.w_ValueError,
- space.newunicode(_get_error_msg()))
+ space.newtext(_get_error_msg()))
return _tm_to_tuple(space, p)
def localtime(space, w_seconds=None):
@@ -763,7 +763,7 @@
if not p:
raise OperationError(space.w_OSError,
- space.newunicode(_get_error_msg()))
+ space.newtext(_get_error_msg()))
return _tm_to_tuple(space, p)
def mktime(space, w_tup):
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1437,7 +1437,7 @@
typename = space.type(self).getname(space)
w_seq = space.call_function(space.w_list, self)
seq_repr = space.unicode_w(space.repr(w_seq))
- return space.newunicode(u"%s(%s)" % (typename, seq_repr))
+ return space.newtext(u"%s(%s)" % (typename, seq_repr))
def descr_len(self, space):
return space.len(self.w_dict)
diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py
--- a/pypy/objspace/std/dictproxyobject.py
+++ b/pypy/objspace/std/dictproxyobject.py
@@ -44,7 +44,7 @@
return space.str(self.w_mapping)
def descr_repr(self, space):
- return space.newunicode(u"mappingproxy(%s)" %
+ return space.newtext(u"mappingproxy(%s)" %
(space.unicode_w(space.repr(self.w_mapping)),))
@unwrap_spec(w_default=WrappedDefault(None))
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -371,9 +371,9 @@
m.atom_str(TYPE_STRING, x.co_code)
_marshal_tuple(space, x.co_consts_w, m)
_marshal_tuple(space, x.co_names_w, m) # list of w_unicodes
- co_varnames_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_varnames]
- co_freevars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_freevars]
- co_cellvars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_cellvars]
+ co_varnames_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_varnames]
+ co_freevars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_freevars]
+ co_cellvars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_cellvars]
_marshal_tuple(space, co_varnames_w, m) # more lists, now of w_unicodes
_marshal_tuple(space, co_freevars_w, m)
_marshal_tuple(space, co_cellvars_w, m)
@@ -453,7 +453,7 @@
@unmarshaller(TYPE_UNICODE)
def unmarshal_unicode(space, u, tc):
uc = _decode_utf8(space, u.get_str())
- return space.newunicode(uc)
+ return space.newtext(uc)
@unmarshaller(TYPE_INTERNED)
def unmarshal_interned(space, u, tc):
@@ -466,7 +466,7 @@
else:
lng = u.get_lng()
s = u.get(lng)
- w_u = u.space.newunicode(s.decode('latin-1'))
+ w_u = u.space.newtext(s.decode('latin-1'))
if interned:
w_u = u.space.new_interned_w_str(w_u)
return w_u
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -219,7 +219,7 @@
if index == -1:
kwarg = name[:i]
if self.is_unicode:
- w_kwarg = space.newunicode(kwarg)
+ w_kwarg = space.newtext(kwarg)
else:
w_kwarg = space.newbytes(kwarg)
w_arg = space.getitem(self.w_kwargs, w_kwarg)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -382,13 +382,18 @@
return W_MemoryView(view)
def newbytes(self, s):
- assert isinstance(s, str)
+ assert isinstance(s, bytes)
return W_BytesObject(s)
def newbytearray(self, l):
return W_BytearrayObject(l)
+ @specialize.argtype(1)
def newtext(self, s):
+ if isinstance(s, str):
+ s, lgt, chk = str_decode_utf8(s, "string", True, None,
+ allow_surrogates=True)
+ return W_UnicodeObject(s, lgt)
lgt = rutf8.check_utf8(s, True)
return W_UnicodeObject(s, lgt)
@@ -399,7 +404,6 @@
def newutf8(self, utf8s, length):
assert utf8s is not None
- assert isinstance(utf8s, str)
return W_UnicodeObject(utf8s, length)
def newfilename(self, s):
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1304,10 +1304,6 @@
return obj.decode('ascii')
return obj
- def newunicode(self, u):
- assert isinstance(u, unicode)
- return u
-
def newtext(self, string):
assert isinstance(string, str)
return string.decode('utf-8')
diff --git a/pypy/objspace/std/test/test_stdobjspace.py b/pypy/objspace/std/test/test_stdobjspace.py
--- a/pypy/objspace/std/test/test_stdobjspace.py
+++ b/pypy/objspace/std/test/test_stdobjspace.py
@@ -14,7 +14,7 @@
def test_utf8(self):
assert self.space.isinstance_w(self.space.newtext("abc"), self.space.w_unicode)
- assert self.space.eq_w(self.space.newtext("üöä"), self.space.newunicode(u"üöä"))
+ assert self.space.eq_w(self.space.newtext("üöä"), self.space.newtext(u"üöä"))
def test_str_w_non_str(self):
raises(OperationError,self.space.str_w,self.space.wrap(None))
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -102,11 +102,11 @@
def descr_repr(self, space):
items = self.tolist()
if len(items) == 1:
- return space.newunicode(
+ return space.newtext(
u"(" + space.unicode_w(space.repr(items[0])) + u",)")
tmp = u", ".join([space.unicode_w(space.repr(item))
for item in items])
- return space.newunicode(u"(" + tmp + u")")
+ return space.newtext(u"(" + tmp + u")")
def descr_hash(self, space):
raise NotImplementedError
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -725,7 +725,7 @@
else:
mod = space.unicode_w(w_mod)
if mod is not None and mod != u'builtins':
- return space.newunicode(u"<class '%s.%s'>" % (mod, self.getqualname(space)))
+ return space.newtext(u"<class '%s.%s'>" % (mod, self.getqualname(space)))
else:
return space.newtext("<class '%s'>" % (self.name,))
@@ -846,7 +846,7 @@
def descr_get__name__(space, w_type):
w_type = _check(space, w_type)
- return space.newunicode(w_type.getname(space))
+ return space.newtext(w_type.getname(space))
def descr_set__name__(space, w_type, w_value):
w_type = _check(space, w_type)
@@ -863,7 +863,7 @@
def descr_get__qualname__(space, w_type):
w_type = _check(space, w_type)
- return space.newunicode(w_type.getqualname(space))
+ return space.newtext(w_type.getqualname(space))
def descr_set__qualname__(space, w_type, w_value):
w_type = _check(space, w_type)
@@ -1453,7 +1453,7 @@
cycle.reverse()
names = [cls.getname(space) for cls in cycle]
# Can't use oefmt() here, since names is a list of unicodes
- raise OperationError(space.w_TypeError, space.newunicode(
+ raise OperationError(space.w_TypeError, space.newtext(
u"cycle among base classes: " + u' < '.join(names)))
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -34,7 +34,7 @@
@enforceargs(utf8str=str)
def __init__(self, utf8str, length):
- assert isinstance(utf8str, str)
+ assert isinstance(utf8str, bytes)
assert length >= 0
self._utf8 = utf8str
self._length = length
@@ -283,7 +283,7 @@
if space.is_w(space.type(self), space.w_unicode):
return self
# Subtype -- return genuine unicode string with the same value.
- return space.newunicode(space.unicode_w(self))
+ return space.newtext(space.unicode_w(self))
def descr_hash(self, space):
x = compute_hash(self._utf8)
@@ -350,7 +350,7 @@
arg = __args__.keywords[i].decode('utf-8')
except UnicodeDecodeError:
continue # uh, just skip that
- space.setitem(w_kwds, space.newunicode(arg),
+ space.setitem(w_kwds, space.newtext(arg),
__args__.keywords_w[i])
def descr_format(self, space, __args__):
More information about the pypy-commit
mailing list