[pypy-commit] pypy py3.6: fix behaviour of Σ in combination with title
cfbolz
pypy.commits at gmail.com
Tue Feb 26 10:02:57 EST 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: py3.6
Changeset: r96167:dca96cba7aee
Date: 2019-02-26 14:54 +0100
http://bitbucket.org/pypy/pypy/changeset/dca96cba7aee/
Log: fix behaviour of Σ in combination with title
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1279,6 +1279,26 @@
assert u'A\u03a3\u0345'.lower() == u'a\u03c2\u0345'
assert u'\u03a3\u0345 '.lower() == u'\u03c3\u0345 '
+ def test_title_3a3(self):
+ # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+ assert u'\u03a3abc'.title() == u'\u03a3abc'
+ assert u'\u03a3'.title() == u'Σ'
+ assert u'\u0345\u03a3'.title() == u'Ισ'
+ assert u'A\u0345\u03a3'.title() == u'Aͅς'
+ assert u'A\u0345\u03a3a'.title() == u'Aͅσa'
+ assert u'A\u0345\u03a3'.title() == u'Aͅς'
+ assert u'A\u03a3\u0345'.title() == u'Aςͅ'
+ assert u'\u03a3\u0345 '.title() == u'Σͅ '
+
+ assert u'ääää \u03a3'.title() == u'Ääää Σ'
+ assert u'ääää \u0345\u03a3'.title() == u'Ääää Ισ'
+ assert u'ääää A\u0345\u03a3'.title() == u'Ääää Aͅς'
+ assert u'ääää A\u0345\u03a3a'.title() == u'Ääää Aͅσa'
+ assert u'ääää A\u0345\u03a3'.title() == u'Ääää Aͅς'
+ assert u'ääää A\u03a3\u0345'.title() == u'Ääää Aςͅ'
+ assert u'ääää \u03a3\u0345 '.title() == u'Ääää Σͅ '
+
+
def test_unicode_constructor_misc(self):
x = u'foo'
x += u'bar'
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -398,10 +398,7 @@
i = 0
for ch in rutf8.Utf8StringIterator(value):
if unicodedb.isupper(ch):
- if ch == 0x3a3:
- codes = [self._handle_capital_sigma(value, i),]
- else:
- codes = unicodedb.tolower_full(ch)
+ codes = self._lower_char(ch, value, i)
elif unicodedb.islower(ch):
codes = unicodedb.toupper_full(ch)
else:
@@ -423,18 +420,22 @@
previous_is_cased = False
i = 0
for ch in rutf8.Utf8StringIterator(input):
- if ch == 0x3a3:
- codes = [self._handle_capital_sigma(input, i),]
- elif not previous_is_cased:
+ if previous_is_cased:
+ codes = self._lower_char(ch, value, i)
+ else:
codes = unicodedb.totitle_full(ch)
- else:
- codes = unicodedb.tolower_full(ch)
for c in codes:
builder.append_code(c)
previous_is_cased = unicodedb.iscased(ch)
i += 1
return self.from_utf8builder(builder)
+ def _lower_char(self, ch, value, i):
+ if ch == 0x3a3:
+ return [self._handle_capital_sigma(value, i), ]
+ else:
+ return unicodedb.tolower_full(ch)
+
def _handle_capital_sigma(self, value, i):
# U+03A3 is in the Final_Sigma context when, it is found like this:
#\p{cased} \p{case-ignorable}* U+03A3 not(\p{case-ignorable}* \p{cased})
@@ -598,10 +599,7 @@
builder = rutf8.Utf8StringBuilder(len(value))
i = 0
for ch in rutf8.Utf8StringIterator(value):
- if ch == 0x3a3:
- codes = [self._handle_capital_sigma(value, i),]
- else:
- codes = unicodedb.tolower_full(ch)
+ codes = self._lower_char(ch, value, i)
for c in codes:
builder.append_code(c)
i += 1
@@ -889,10 +887,7 @@
builder.append_code(c)
i = 1
for ch in it:
- if ch == 0x3a3:
- codes = [self._handle_capital_sigma(value, i),]
- else:
- codes = unicodedb.tolower_full(ch)
+ codes = self._lower_char(ch, value, i)
for c in codes:
builder.append_code(c)
i += 1
More information about the pypy-commit
mailing list