[pypy-commit] pypy default: fix issue #3137: rsplit of unicode strings that end with a non-ascii char was broken
cfbolz
pypy.commits at gmail.com
Tue Dec 31 15:05:02 EST 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch:
Changeset: r98430:742d3ed68d7d
Date: 2019-12-31 21:04 +0100
http://bitbucket.org/pypy/pypy/changeset/742d3ed68d7d/
Log: fix issue #3137: rsplit of unicode strings that end with a non-ascii
char was broken
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -359,6 +359,9 @@
assert u''.rsplit('aaa') == [u'']
assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
+ def test_rsplit_bug(self):
+ assert u'Vestur- og Mið'.rsplit() == [u'Vestur-', u'og', u'Mið']
+
def test_split_rsplit_str_unicode(self):
x = 'abc'.split(u'b')
assert x == [u'a', u'c']
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -139,7 +139,7 @@
if by is None:
res = []
- i = len(value) - 1
+ i = _decr(value, len(value), isutf8)
while True:
# starting from the end, find the end of the next word
while i >= 0:
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -88,6 +88,7 @@
assert rsplit('baba', 'a', isutf8=1) == ['b', 'b', '']
assert rsplit('b b', isutf8=1) == ['b', 'b']
assert rsplit('b\xe1\x9a\x80b', isutf8=1) == ['b', 'b']
+ assert rsplit('b\xe1\x9a\x80', isutf8=1) == ['b']
def test_string_replace():
def check_replace(value, sub, *args, **kwargs):
More information about the pypy-commit
mailing list