[pypy-commit] pypy unicode-utf8: Almost all objspace/std tests pass now.
jerith
pypy.commits at gmail.com
Sun Oct 8 13:13:39 EDT 2017
Author: Jeremy Thurgood <firxen at gmail.com>
Branch: unicode-utf8
Changeset: r92657:76983639ace6
Date: 2017-10-08 19:10 +0200
http://bitbucket.org/pypy/pypy/changeset/76983639ace6/
Log: Almost all objspace/std tests pass now.
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -171,34 +171,34 @@
_builder = UnicodeBuilder
def _isupper(self, ch):
- return unicodedb.isupper(ord(ch))
+ return unicodedb.isupper(ch)
def _islower(self, ch):
- return unicodedb.islower(ord(ch))
+ return unicodedb.islower(ch)
def _isnumeric(self, ch):
- return unicodedb.isnumeric(ord(ch))
+ return unicodedb.isnumeric(ch)
def _istitle(self, ch):
- return unicodedb.isupper(ord(ch)) or unicodedb.istitle(ord(ch))
+ return unicodedb.isupper(ch) or unicodedb.istitle(ch)
def _isspace(self, ch):
- return unicodedb.isspace(ord(ch))
+ return unicodedb.isspace(ch)
def _isalpha(self, ch):
- return unicodedb.isalpha(ord(ch))
+ return unicodedb.isalpha(ch)
def _isalnum(self, ch):
- return unicodedb.isalnum(ord(ch))
+ return unicodedb.isalnum(ch)
def _isdigit(self, ch):
- return unicodedb.isdigit(ord(ch))
+ return unicodedb.isdigit(ch)
def _isdecimal(self, ch):
- return unicodedb.isdecimal(ord(ch))
+ return unicodedb.isdecimal(ch)
def _iscased(self, ch):
- return unicodedb.iscased(ord(ch))
+ return unicodedb.iscased(ch)
def _islinebreak(self, s, pos):
return rutf8.islinebreak(s, pos)
@@ -354,6 +354,21 @@
def descr_rmod(self, space, w_values):
return mod_format(space, w_values, self, do_unicode=True)
+ def descr_swapcase(self, space):
+ selfvalue = self._utf8
+ builder = StringBuilder(len(selfvalue))
+ i = 0
+ while i < len(selfvalue):
+ ch = rutf8.codepoint_at_pos(selfvalue, i)
+ i = rutf8.next_codepoint_pos(selfvalue, i)
+ if unicodedb.isupper(ch):
+ rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(ch))
+ elif unicodedb.islower(ch):
+ rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(ch))
+ else:
+ rutf8.unichr_as_utf8_append(builder, ch)
+ return W_UnicodeObject(builder.build(), self._length)
+
def descr_title(self, space):
if len(self._utf8) == 0:
return self
@@ -461,6 +476,28 @@
res = rutf8.check_utf8(self._utf8, force_len=res_index) # can't raise
return space.newint(res)
+ @specialize.arg(2)
+ def _is_generic(self, space, func_name):
+ func = getattr(self, func_name)
+ if self._length == 0:
+ return space.w_False
+ if self._length == 1:
+ return space.newbool(func(rutf8.codepoint_at_pos(self._utf8, 0)))
+ else:
+ return self._is_generic_loop(space, self._utf8, func_name)
+
+ @specialize.arg(3)
+ def _is_generic_loop(self, space, v, func_name):
+ func = getattr(self, func_name)
+ val = self._utf8
+ i = 0
+ while i < len(val):
+ uchar = rutf8.codepoint_at_pos(val, i)
+ i = rutf8.next_codepoint_pos(val, i)
+ if not func(uchar):
+ return space.w_False
+ return space.w_True
+
def descr_encode(self, space, w_encoding=None, w_errors=None):
encoding, errors = _get_encoding_and_errors(space, w_encoding,
w_errors)
@@ -673,6 +710,16 @@
strs_w.append(W_UnicodeObject(value[sol:eol], lgt))
return space.newlist(strs_w)
+ def descr_upper(self, space):
+ value = self._utf8
+ builder = StringBuilder(len(value))
+ i = 0
+ while i < len(value):
+ uchar = rutf8.codepoint_at_pos(value, i)
+ i = rutf8.next_codepoint_pos(value, i)
+ rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar))
+ return W_UnicodeObject(builder.build(), self._length)
+
@unwrap_spec(width=int)
def descr_zfill(self, space, width):
selfval = self._utf8
More information about the pypy-commit
mailing list