[pypy-commit] pypy unicode-utf8-py3: disallow tuple input to newtext, and also refactor some unicode/utf8 recoding
mattip
pypy.commits at gmail.com
Sun Dec 2 11:52:11 EST 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95399:5a6b7f57a324
Date: 2018-11-29 22:08 -0800
http://bitbucket.org/pypy/pypy/changeset/5a6b7f57a324/
Log: disallow tuple input to newtext, and also refactor some unicode/utf8
recoding
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -115,7 +115,7 @@
return W_FString(substr, rawmode)
else:
v = unicodehelper.str_decode_utf8(substr, 'strict', True, None)
- return space.newtext(v)
+ return space.newtext(*v)
v = PyString_DecodeEscape(space, substr, 'strict', encoding)
return space.newbytes(v)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -771,7 +771,7 @@
self._check_closed(space)
self._writeflush(space)
limit = convert_size(space, w_limit)
- return space.newtext(self._readline(space, limit))
+ return space.newtext(*self._readline(space, limit))
def _readline(self, space, limit):
# This is a separate function so that readline_w() can be jitted.
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1154,7 +1154,7 @@
raise oefmt(space.w_ValueError,
"array contains a unicode character out of "
"range(0x110000)")
- return space.newtext(item)
+ return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1)
assert 0, "unreachable"
# interface
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -459,8 +459,8 @@
_set_module_object(space, "timezone", space.newint(timezone))
_set_module_object(space, 'daylight', space.newint(daylight))
- tzname_w = [space.newtext(tzname[0].decode('latin-1')),
- space.newtext(tzname[1].decode('latin-1'))]
+ tzname_w = [space.newtext(tzname[0]),
+ space.newtext(tzname[1])]
_set_module_object(space, 'tzname', space.newtuple(tzname_w))
_set_module_object(space, 'altzone', space.newint(altzone))
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -371,9 +371,9 @@
m.atom_str(TYPE_STRING, x.co_code)
_marshal_tuple(space, x.co_consts_w, m)
_marshal_tuple(space, x.co_names_w, m) # list of w_unicodes
- co_varnames_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_varnames]
- co_freevars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_freevars]
- co_cellvars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_cellvars]
+ co_varnames_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_varnames]
+ co_freevars_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_freevars]
+ co_cellvars_w = [space.newtext(*_decode_utf8(space, s)) for s in x.co_cellvars]
_marshal_tuple(space, co_varnames_w, m) # more lists, now of w_unicodes
_marshal_tuple(space, co_freevars_w, m)
_marshal_tuple(space, co_cellvars_w, m)
@@ -451,7 +451,7 @@
@unmarshaller(TYPE_UNICODE)
def unmarshal_unicode(space, u, tc):
uc = _decode_utf8(space, u.get_str())
- return space.newtext(uc)
+ return space.newtext(*uc)
@unmarshaller(TYPE_INTERNED)
def unmarshal_interned(space, u, tc):
@@ -464,7 +464,7 @@
else:
lng = u.get_lng()
s = u.get(lng)
- w_u = u.space.newtext(s.decode('latin-1'))
+ w_u = u.space.newtext(s)
if interned:
w_u = u.space.new_interned_w_str(w_u)
return w_u
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -380,16 +380,16 @@
def newbytearray(self, l):
return W_BytearrayObject(l)
+ # XXX TODO - remove this and force all users to call with utf8
@specialize.argtype(1)
- def newtext(self, s, lgt=-1):
+ def newtext(self, s, lgt=-1, unused=-1):
+ # the unused argument can be from something like
+ # newtext(*decode_utf8sp(space, code))
if isinstance(s, unicode):
s, lgt = s.encode('utf8'), len(s)
- elif isinstance(s, str) and lgt < 0:
+ assert isinstance(s, str)
+ if lgt < 0:
lgt = rutf8.codepoints_in_utf8(s)
- elif isinstance(s, tuple):
- # result of decode_utf8
- s, lgt, codepoints = s
- assert isinstance(s, str)
return W_UnicodeObject(s, lgt)
def newtext_or_none(self, s, lgt=-1):
More information about the pypy-commit
mailing list