[pypy-commit] pypy unicode-utf8: fix and a workaround
fijal
pypy.commits at gmail.com
Mon Feb 27 10:23:24 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90404:737c72b15c6d
Date: 2017-02-27 16:22 +0100
http://bitbucket.org/pypy/pypy/changeset/737c72b15c6d/
Log: fix and a workaround
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -303,6 +303,13 @@
return self.newlist(list_u)
return W_ListObject.newlist_unicode(self, list_u)
+ def newlist_from_unicode(self, lst):
+ res_w = []
+ for u in lst:
+ assert u is not None
+ res_w.append(self.newutf8(u, -1))
+ return self.newlist(res_w)
+
def newlist_int(self, list_i):
return W_ListObject.newlist_int(self, list_i)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -109,7 +109,7 @@
if self._len() != 1:
raise oefmt(space.w_TypeError,
"ord() expected a character, but string of length %d "
- "found", len(self._value))
+ "found", self._len())
return space.newint(rutf8.codepoint_at_pos(self._utf8, 0))
def _new(self, value):
@@ -126,6 +126,9 @@
self._length = self._compute_length()
return self._length
+ def _compute_length(self):
+ return rutf8.compute_length_utf8(self._utf8)
+
def _val(self, space):
return self._utf8.decode('utf8')
@@ -156,7 +159,7 @@
@specialize.argtype(1)
def _chr(self, char):
assert len(char) == 1
- return char[0]
+ return unichr(ord(char[0]))
def _multi_chr(self, unichar):
return unichar
@@ -513,7 +516,7 @@
def descr_zfill(self, space, width):
selfval = self._utf8
if len(selfval) == 0:
- return W_UnicodeObject(self._chr('0') * width, width)
+ return W_UnicodeObject('0' * width, width)
num_zeros = width - self._len()
if num_zeros <= 0:
# cannot return self, in case it is a subclass of str
@@ -525,7 +528,7 @@
start = 1
else:
start = 0
- builder.append_multiple_char(self._chr('0'), num_zeros)
+ builder.append_multiple_char('0', num_zeros)
builder.append_slice(selfval, start, len(selfval))
return W_UnicodeObject(builder.build(), width)
@@ -536,14 +539,14 @@
value = self._utf8
if space.is_none(w_sep):
res = split(value, maxsplit=maxsplit)
- return space.newlist([W_UnicodeObject(s, -1) for s in res])
+ return space.newlist_from_unicode(res)
by = self.convert_arg_to_w_unicode(space, w_sep)._utf8
if len(by) == 0:
raise oefmt(space.w_ValueError, "empty separator")
res = split(value, by, maxsplit)
- return space.newlist([W_UnicodeObject(s, -1) for s in res])
+ return space.newlist_from_unicode(res)
@unwrap_spec(maxsplit=int)
def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
@@ -551,14 +554,14 @@
value = self._utf8
if space.is_none(w_sep):
res = rsplit(value, maxsplit=maxsplit)
- return space.newlist([W_UnicodeObject(s, -1) for s in res])
+ return space.newlist_from_unicode(res)
by = self.convert_arg_to_w_unicode(space, w_sep)._utf8
if len(by) == 0:
raise oefmt(space.w_ValueError, "empty separator")
res = rsplit(value, by, maxsplit)
- return space.newlist([W_UnicodeObject(s, -1) for s in res])
+ return space.newlist_from_unicode(res)
@unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
def descr_center(self, space, width, w_fillchar):
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -66,6 +66,14 @@
return pos + 1
return pos + ord(runicode._utf8_code_length[chr1 - 0x80])
+def compute_length_utf8(s):
+ pos = 0
+ lgt = 0
+ while pos < len(s):
+ pos = next_codepoint_pos(s, pos)
+ lgt += 1
+ return lgt
+
def codepoint_at_pos(code, pos):
""" Give a codepoint in code at pos - assumes valid utf8, no checking!
"""
diff --git a/rpython/rtyper/rmodel.py b/rpython/rtyper/rmodel.py
--- a/rpython/rtyper/rmodel.py
+++ b/rpython/rtyper/rmodel.py
@@ -359,6 +359,10 @@
def ll_str(self, nothing): raise AssertionError("unreachable code")
impossible_repr = VoidRepr()
+class __extend__(pairtype(Repr, VoidRepr)):
+ def convert_from_to((r_from, r_to), v, llops):
+ return inputconst(lltype.Void, None)
+
class SimplePointerRepr(Repr):
"Convenience Repr for simple ll pointer types with no operation on them."
More information about the pypy-commit
mailing list