[pypy-commit] pypy unicode-utf8: go a slightly shorter way to working system - use decode a bit everywhere
fijal
pypy.commits at gmail.com
Sat Feb 25 05:32:44 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90349:6cb0a61d37f2
Date: 2017-02-25 11:32 +0100
http://bitbucket.org/pypy/pypy/changeset/6cb0a61d37f2/
Log: go a slightly shorter way to working system - use decode a bit
everywhere
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -155,7 +155,7 @@
if isinstance(x, str):
return self.newtext(x)
if isinstance(x, unicode):
- return self.newunicode(x)
+ return self.newutf8(x.encode('utf8'), len(x))
if isinstance(x, float):
return W_FloatObject(x)
if isinstance(x, W_Root):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -22,6 +22,7 @@
assert len(warnings) == 2
def test_listview_unicode(self):
+ py.test.skip("skip for new")
w_str = self.space.wrap(u'abcd')
assert self.space.listview_unicode(w_str) == list(u"abcd")
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -110,10 +110,11 @@
"found", len(self._value))
return space.newint(ord(self._value[0]))
- def _new(self, value, length):
- return W_UnicodeObject(value, length)
+ def _new(self, value):
+ return W_UnicodeObject(value.encode('utf8', len(value)))
def _new_from_list(self, value):
+ xxx
return W_UnicodeObject(u''.join(value))
def _empty(self):
@@ -124,7 +125,8 @@
self._length = self._compute_length()
return self._length
- _val = utf8_w
+ def _val(self, space):
+ return self._utf8.decode('utf8')
@staticmethod
def _use_rstr_ops(space, w_other):
@@ -134,17 +136,10 @@
@staticmethod
def _op_val(space, w_other, strict=None):
- if isinstance(w_other, W_UnicodeObject):
- return w_other._utf8
- if space.isinstance_w(w_other, space.w_bytes):
- return unicode_from_string(space, w_other)._utf8
- if strict:
- raise oefmt(space.w_TypeError,
- "%s arg must be None, unicode or str", strict)
- return unicode_from_encoded_object(
- space, w_other, None, "strict")._utf8
+ return W_UnicodeObject._convert_to_unicode(space, w_other)._utf8.decode('utf8')
- def _convert_to_unicode(self, space, w_other):
+ @staticmethod
+ def _convert_to_unicode(space, w_other):
if isinstance(w_other, W_UnicodeObject):
return w_other
if space.isinstance_w(w_other, space.w_bytes):
@@ -240,7 +235,7 @@
return w_newobj
def descr_repr(self, space):
- chars = self._value
+ chars = self._utf8.decode('utf8')
size = len(chars)
s = _repr_function(chars, size, "strict")
return space.newtext(s)
@@ -254,7 +249,7 @@
def descr_eq(self, space, w_other):
try:
- res = self._val(space) == self._op_val(space, w_other)
+ res = self._utf8 == self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -270,7 +265,7 @@
def descr_ne(self, space, w_other):
try:
- res = self._val(space) != self._op_val(space, w_other)
+ res = self._utf8 != self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -286,7 +281,7 @@
def descr_lt(self, space, w_other):
try:
- res = self._val(space) < self._op_val(space, w_other)
+ res = self._utf8 < self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -295,7 +290,7 @@
def descr_le(self, space, w_other):
try:
- res = self._val(space) <= self._op_val(space, w_other)
+ res = self._utf8 <= self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -304,7 +299,7 @@
def descr_gt(self, space, w_other):
try:
- res = self._val(space) > self._op_val(space, w_other)
+ res = self._utf8 > self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -313,7 +308,7 @@
def descr_ge(self, space, w_other):
try:
- res = self._val(space) >= self._op_val(space, w_other)
+ res = self._utf8 >= self._convert_to_unicode(space, w_other)._utf8
except OperationError as e:
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
@@ -339,6 +334,7 @@
return mod_format(space, w_values, self, do_unicode=True)
def descr_translate(self, space, w_table):
+ xxx
selfvalue = self._value
w_sys = space.getbuiltinmodule('sys')
maxunicode = space.int_w(space.getattr(w_sys,
@@ -379,9 +375,10 @@
def descr_join(self, space, w_list):
l = space.listview_unicode(w_list)
if l is not None:
+ xxx
if len(l) == 1:
return space.newunicode(l[0])
- return space.newunicode(self._val(space).join(l))
+ return space.newunicode(self._utf8).join(l)
return self._StringMethods_descr_join(space, w_list)
def _join_return_one(self, space, w_obj):
@@ -471,11 +468,11 @@
if value and i != 0:
sb.append(value)
sb.append(unwrapped[i])
- return self._new(sb.build(), lgt)
+ return W_UnicodeObject(sb.build(), lgt)
@unwrap_spec(keepends=bool)
def descr_splitlines(self, space, keepends=False):
- value = self._val(space)
+ value = self._utf8
length = len(value)
strs_w = []
pos = 0
@@ -501,13 +498,13 @@
@unwrap_spec(width=int)
def descr_zfill(self, space, width):
- selfval = self._val(space)
+ selfval = self._utf8
if len(selfval) == 0:
- return self._new(self._multi_chr(self._chr('0')) * width, width)
+ return W_UnicodeObject(self._multi_chr(self._chr('0')) * width, width)
num_zeros = width - self._len()
if num_zeros <= 0:
# cannot return self, in case it is a subclass of str
- return self._new(selfval, self._len())
+ return W_UnicodeObject(selfval, self._len())
builder = self._builder(num_zeros + len(selfval))
if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
# copy sign to first position
@@ -517,18 +514,18 @@
start = 0
builder.append_multiple_char(self._chr('0'), num_zeros)
builder.append_slice(selfval, start, len(selfval))
- return self._new(builder.build(), width)
+ return W_UnicodeObject(builder.build(), width)
@unwrap_spec(maxsplit=int)
def descr_split(self, space, w_sep=None, maxsplit=-1):
# XXX maybe optimize?
res = []
- value = self._val(space)
+ value = self._utf8
if space.is_none(w_sep):
res = split(value, maxsplit=maxsplit)
return space.newlist([W_UnicodeObject(s, -1) for s in res])
- by = self._op_val(space, w_sep)
+ by = self._convert_to_unicode(space, w_sep)._utf8
if len(by) == 0:
raise oefmt(space.w_ValueError, "empty separator")
res = split(value, by, maxsplit)
@@ -538,18 +535,40 @@
@unwrap_spec(maxsplit=int)
def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
res = []
- value = self._val(space)
+ value = self._utf8
if space.is_none(w_sep):
res = rsplit(value, maxsplit=maxsplit)
return space.newlist([W_UnicodeObject(s, -1) for s in res])
- by = self._op_val(space, w_sep)
+ by = self._convert_to_unicode(space, w_sep)._utf8
if len(by) == 0:
raise oefmt(space.w_ValueError, "empty separator")
res = rsplit(value, by, maxsplit)
return space.newlist([W_UnicodeObject(s, -1) for s in res])
+ @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
+ def descr_center(self, space, width, w_fillchar):
+ value = self._utf8
+ fillchar = self._convert_to_unicode(space, w_fillchar)._utf8
+ if len(fillchar) != 1:
+ raise oefmt(space.w_TypeError,
+ "center() argument 2 must be a single character")
+
+ d = width - self._len()
+ if d > 0:
+ offset = d//2 + (d & width & 1)
+ fillchar = self._multi_chr(fillchar[0])
+ centered = offset * fillchar + value + (d - offset) * fillchar
+ else:
+ centered = value
+ d = 0
+
+ return W_UnicodeObject(centered, self._len() + d)
+
+ def descr_title(self, space):
+ return
+
def wrapunicode(space, uni):
return W_UnicodeObject(uni)
More information about the pypy-commit
mailing list