[pypy-commit] pypy unicode-utf8: implement lower

Thu Oct 26 14:21:59 EDT 2017

Author: fijal
Branch: unicode-utf8
Changeset: r92856:e5017df1fbdd
Date: 2017-10-26 20:21 +0200
http://bitbucket.org/pypy/pypy/changeset/e5017df1fbdd/

Log:	implement lower

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -323,6 +323,16 @@
         assert not u'\u01c5abc'.islower()
         assert not u'\u01c5ABC'.isupper()
 
+    def test_lower_upper(self):
+        assert u'a'.lower() == u'a'
+        assert u'A'.lower() == u'a'
+        assert u'\u0105'.lower() == u'\u0105'
+        assert u'\u0104'.lower() == u'\u0105'
+        assert u'a'.upper() == u'A'
+        assert u'A'.upper() == u'A'
+        assert u'\u0105'.upper() == u'\u0104'
+        assert u'\u0104'.upper() == u'\u0104'
+
     def test_capitalize(self):
         assert u"brown fox".capitalize() == u"Brown fox"
         assert u' hello '.capitalize() == u' hello '
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -182,15 +182,6 @@
     def _islinebreak(self, s, pos):
         return rutf8.islinebreak(s, pos)
 
-    def _upper(self, ch):
-        return unichr(unicodedb.toupper(ord(ch)))
-
-    def _lower(self, ch):
-        return unichr(unicodedb.tolower(ord(ch)))
-
-    def _title(self, ch):
-        return unichr(unicodedb.totitle(ord(ch)))
-
     def _newlist_unwrapped(self, space, lst):
         assert False, "should not be called"
         return space.newlist_unicode(lst)
@@ -510,6 +501,15 @@
         tformat = unicode_template_formatter(space, space.unicode_w(self))
         return tformat.formatter_field_name_split()
 
+    def descr_lower(self, space):
+        builder = StringBuilder(len(self._utf8))
+        pos = 0
+        while pos < len(self._utf8):
+            lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
+            rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
+            pos = rutf8.next_codepoint_pos(self._utf8, pos)
+        return W_UnicodeObject(builder.build(), self._len())
+
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')