[pypy-commit] pypy default: fix issue #3137: rsplit of unicode strings that end with a non-ascii char was broken

cfbolz pypy.commits at gmail.com
Tue Dec 31 15:05:02 EST 2019


Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r98430:742d3ed68d7d
Date: 2019-12-31 21:04 +0100
http://bitbucket.org/pypy/pypy/changeset/742d3ed68d7d/

Log:	fix issue #3137: rsplit of unicode strings that end with a non-ascii
	char was broken

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -359,6 +359,9 @@
         assert u''.rsplit('aaa') == [u'']
         assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
 
+    def test_rsplit_bug(self):
+        assert u'Vestur- og Mið'.rsplit() == [u'Vestur-', u'og', u'Mið']
+
     def test_split_rsplit_str_unicode(self):
         x = 'abc'.split(u'b')
         assert x == [u'a', u'c']
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -139,7 +139,7 @@
     if by is None:
         res = []
 
-        i = len(value) - 1
+        i = _decr(value, len(value), isutf8)
         while True:
             # starting from the end, find the end of the next word
             while i >= 0:
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -88,6 +88,7 @@
     assert rsplit('baba', 'a', isutf8=1) == ['b', 'b', '']
     assert rsplit('b b', isutf8=1) == ['b', 'b']
     assert rsplit('b\xe1\x9a\x80b', isutf8=1) == ['b', 'b']
+    assert rsplit('b\xe1\x9a\x80', isutf8=1) == ['b']
 
 def test_string_replace():
     def check_replace(value, sub, *args, **kwargs):


More information about the pypy-commit mailing list