[pypy-commit] pypy unicode-utf8-py3: finish f287dec62c4e for swapcase, capitalize

mattip pypy.commits at gmail.com
Wed Jan 23 09:46:12 EST 2019


Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95699:a7867a23009b
Date: 2019-01-22 23:17 +0200
http://bitbucket.org/pypy/pypy/changeset/a7867a23009b/

Log:	finish f287dec62c4e for swapcase, capitalize

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -939,6 +939,9 @@
 
     def test_swapcase(self):
         assert '\xe4\xc4\xdf'.swapcase() == '\xc4\xe4SS'
+        # sigma-little becomes sigma-little-final
+        assert u'A\u0345\u03a3'.swapcase() == u'a\u0399\u03c2'
+        # but not if the previous codepoint is 0-width
         assert u'\u0345\u03a3'.swapcase() == u'\u0399\u03c3'
 
     def test_call_special_methods(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -375,15 +375,20 @@
     def descr_swapcase(self, space):
         value = self._utf8
         builder = rutf8.Utf8StringBuilder(len(value))
+        i = 0
         for ch in rutf8.Utf8StringIterator(value):
             if unicodedb.isupper(ch):
-                codes = unicodedb.tolower_full(ch)
+                if ch == 0x3a3:
+                    codes = [self._handle_capital_sigma(value, i),]
+                else: 
+                    codes = unicodedb.tolower_full(ch)
             elif unicodedb.islower(ch):
                 codes = unicodedb.toupper_full(ch)
             else:
                 codes = [ch,]
             for c in codes:
                 builder.append_code(c)
+            i += 1
         return self.from_utf8builder(builder)
 
     def descr_title(self, space):
@@ -847,21 +852,23 @@
         if self._len() == 0:
             return self._empty()
 
-        builder = rutf8.Utf8StringBuilder(len(self._utf8))
-        it = rutf8.Utf8StringIterator(self._utf8)
+        value = self._utf8
+        builder = rutf8.Utf8StringBuilder(len(value))
+        it = rutf8.Utf8StringIterator(value)
         uchar = it.next()
         codes = unicodedb.toupper_full(uchar)
         # can sometimes give more than one, like for omega-with-Ypogegrammeni, 8179
         for c in codes:
             builder.append_code(c)
+        i = 1
         for ch in it:
-            ch = unicodedb.tolower_full(ch)
-            if it.done():
-                # Special case lower-sigma
-                if ch[-1] == 0x03c3:
-                    ch[-1] = 0x03c2 
-            for c in ch:
+            if ch == 0x3a3:
+                codes = [self._handle_capital_sigma(value, i),]
+            else: 
+                codes = unicodedb.tolower_full(ch)
+            for c in codes:
                 builder.append_code(c)
+            i += 1
         return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' '))


More information about the pypy-commit mailing list