[pypy-commit] pypy unicode-utf8-py3: finish f287dec62c4e for swapcase, capitalize
mattip
pypy.commits at gmail.com
Wed Jan 23 09:46:12 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95699:a7867a23009b
Date: 2019-01-22 23:17 +0200
http://bitbucket.org/pypy/pypy/changeset/a7867a23009b/
Log: finish f287dec62c4e for swapcase, capitalize
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -939,6 +939,9 @@
def test_swapcase(self):
assert '\xe4\xc4\xdf'.swapcase() == '\xc4\xe4SS'
+ # sigma-little becomes sigma-little-final
+ assert u'A\u0345\u03a3'.swapcase() == u'a\u0399\u03c2'
+ # but not if the previous codepoint is 0-width
assert u'\u0345\u03a3'.swapcase() == u'\u0399\u03c3'
def test_call_special_methods(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -375,15 +375,20 @@
def descr_swapcase(self, space):
value = self._utf8
builder = rutf8.Utf8StringBuilder(len(value))
+ i = 0
for ch in rutf8.Utf8StringIterator(value):
if unicodedb.isupper(ch):
- codes = unicodedb.tolower_full(ch)
+ if ch == 0x3a3:
+ codes = [self._handle_capital_sigma(value, i),]
+ else:
+ codes = unicodedb.tolower_full(ch)
elif unicodedb.islower(ch):
codes = unicodedb.toupper_full(ch)
else:
codes = [ch,]
for c in codes:
builder.append_code(c)
+ i += 1
return self.from_utf8builder(builder)
def descr_title(self, space):
@@ -847,21 +852,23 @@
if self._len() == 0:
return self._empty()
- builder = rutf8.Utf8StringBuilder(len(self._utf8))
- it = rutf8.Utf8StringIterator(self._utf8)
+ value = self._utf8
+ builder = rutf8.Utf8StringBuilder(len(value))
+ it = rutf8.Utf8StringIterator(value)
uchar = it.next()
codes = unicodedb.toupper_full(uchar)
# can sometimes give more than one, like for omega-with-Ypogegrammeni, 8179
for c in codes:
builder.append_code(c)
+ i = 1
for ch in it:
- ch = unicodedb.tolower_full(ch)
- if it.done():
- # Special case lower-sigma
- if ch[-1] == 0x03c3:
- ch[-1] = 0x03c2
- for c in ch:
+ if ch == 0x3a3:
+ codes = [self._handle_capital_sigma(value, i),]
+ else:
+ codes = unicodedb.tolower_full(ch)
+ for c in codes:
builder.append_code(c)
+ i += 1
return self.from_utf8builder(builder)
@unwrap_spec(width=int, w_fillchar=WrappedDefault(u' '))
More information about the pypy-commit
mailing list