[pypy-commit] pypy py3.6: fix behaviour of Σ in combination with title

cfbolz pypy.commits at gmail.com
Tue Feb 26 10:02:57 EST 2019


Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: py3.6
Changeset: r96167:dca96cba7aee
Date: 2019-02-26 14:54 +0100
http://bitbucket.org/pypy/pypy/changeset/dca96cba7aee/

Log:	fix behaviour of Σ in combination with title

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1279,6 +1279,26 @@
         assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345'
         assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 '
 
+    def test_title_3a3(self):
+        # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+        assert u'\u03a3abc'.title() == u'\u03a3abc'
+        assert u'\u03a3'.title() == u'Σ'
+        assert u'\u0345\u03a3'.title() == u'Ισ'
+        assert u'A\u0345\u03a3'.title() == u'Aͅς'
+        assert u'A\u0345\u03a3a'.title() == u'Aͅσa'
+        assert u'A\u0345\u03a3'.title() == u'Aͅς'
+        assert u'A\u03a3\u0345'.title() == u'Aςͅ'
+        assert u'\u03a3\u0345 '.title() == u'Σͅ '
+
+        assert u'ääää \u03a3'.title() == u'Ääää Σ'
+        assert u'ääää \u0345\u03a3'.title() == u'Ääää Ισ'
+        assert u'ääää A\u0345\u03a3'.title() == u'Ääää Aͅς'
+        assert u'ääää A\u0345\u03a3a'.title() == u'Ääää Aͅσa'
+        assert u'ääää A\u0345\u03a3'.title() == u'Ääää Aͅς'
+        assert u'ääää A\u03a3\u0345'.title() == u'Ääää Aςͅ'
+        assert u'ääää \u03a3\u0345 '.title() == u'Ääää Σͅ '
+
+
     def test_unicode_constructor_misc(self):
         x = u'foo'
         x += u'bar'
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -398,10 +398,7 @@
         i = 0
         for ch in rutf8.Utf8StringIterator(value):
             if unicodedb.isupper(ch):
-                if ch == 0x3a3:
-                    codes = [self._handle_capital_sigma(value, i),]
-                else: 
-                    codes = unicodedb.tolower_full(ch)
+                codes = self._lower_char(ch, value, i)
             elif unicodedb.islower(ch):
                 codes = unicodedb.toupper_full(ch)
             else:
@@ -423,18 +420,22 @@
         previous_is_cased = False
         i = 0
         for ch in rutf8.Utf8StringIterator(input):
-            if ch == 0x3a3:
-                codes = [self._handle_capital_sigma(input, i),]
-            elif not previous_is_cased:
+            if previous_is_cased:
+                codes = self._lower_char(ch, value, i)
+            else:
                 codes = unicodedb.totitle_full(ch)
-            else:
-                codes = unicodedb.tolower_full(ch)
             for c in codes:
                 builder.append_code(c)
             previous_is_cased = unicodedb.iscased(ch)
             i += 1
         return self.from_utf8builder(builder)
 
+    def _lower_char(self, ch, value, i):
+        if ch == 0x3a3:
+            return [self._handle_capital_sigma(value, i), ]
+        else:
+            return unicodedb.tolower_full(ch)
+
     def _handle_capital_sigma(self, value, i):
         # U+03A3 is in the Final_Sigma context when, it is found like this:
         #\p{cased} \p{case-ignorable}* U+03A3 not(\p{case-ignorable}* \p{cased})
@@ -598,10 +599,7 @@
         builder = rutf8.Utf8StringBuilder(len(value))
         i = 0
         for ch in rutf8.Utf8StringIterator(value):
-            if ch == 0x3a3:
-                codes = [self._handle_capital_sigma(value, i),]
-            else:
-                codes = unicodedb.tolower_full(ch)
+            codes = self._lower_char(ch, value, i)
             for c in codes:
                 builder.append_code(c)
             i += 1
@@ -889,10 +887,7 @@
             builder.append_code(c)
         i = 1
         for ch in it:
-            if ch == 0x3a3:
-                codes = [self._handle_capital_sigma(value, i),]
-            else: 
-                codes = unicodedb.tolower_full(ch)
+            codes = self._lower_char(ch, value, i)
             for c in codes:
                 builder.append_code(c)
             i += 1


More information about the pypy-commit mailing list