[pypy-commit] pypy default: make a unicode version of replace as well, and expose them both under a

cfbolz noreply at buildbot.pypy.org
Mon Jun 17 14:35:25 CEST 2013


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: 
Changeset: r64910:16bde61a597c
Date: 2013-06-14 18:41 +0200
http://bitbucket.org/pypy/pypy/changeset/16bde61a597c/

Log:	make a unicode version of replace as well, and expose them both
	under a specialized interface, "replace"

diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -82,58 +82,8 @@
     res.reverse()
     return res
 
-def string_replace(input, sub, by, maxsplit=-1):
-    if maxsplit == 0:
-        return input
 
-    if not sub:
-        upper = len(input)
-        if maxsplit > 0 and maxsplit < upper + 2:
-            upper = maxsplit - 1
-            assert upper >= 0
 
-        try:
-            result_size = ovfcheck(upper * len(by))
-            result_size = ovfcheck(result_size + upper)
-            result_size = ovfcheck(result_size + len(by))
-            remaining_size = len(input) - upper
-            result_size = ovfcheck(result_size + remaining_size)
-        except OverflowError:
-            raise
-        builder = StringBuilder(result_size)
-        for i in range(upper):
-            builder.append(by)
-            builder.append(input[i])
-        builder.append(by)
-        builder.append_slice(input, upper, len(input))
-    else:
-        # First compute the exact result size
-        count = input.count(sub)
-        if count > maxsplit and maxsplit > 0:
-            count = maxsplit
-        diff_len = len(by) - len(sub)
-        try:
-            result_size = ovfcheck(diff_len * count)
-            result_size = ovfcheck(result_size + len(input))
-        except OverflowError:
-            raise
-
-        builder = StringBuilder(result_size)
-        start = 0
-        sublen = len(sub)
-
-        while maxsplit != 0:
-            next = input.find(sub, start)
-            if next < 0:
-                break
-            builder.append_slice(input, start, next)
-            builder.append(by)
-            start = next + sublen
-            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
-
-        builder.append_slice(input, start, len(input))
-
-    return builder.build()
 
 # -------------- public API ---------------------------------
 
@@ -352,3 +302,75 @@
 
     def specialize_call(self, hop):
         hop.exception_cannot_occur()
+
+
+
+def make_replace(func_name, Builder):
+    def replace(input, sub, by, maxsplit=-1):
+        if maxsplit == 0:
+            return input
+
+        if not sub:
+            upper = len(input)
+            if maxsplit > 0 and maxsplit < upper + 2:
+                upper = maxsplit - 1
+                assert upper >= 0
+
+            try:
+                result_size = ovfcheck(upper * len(by))
+                result_size = ovfcheck(result_size + upper)
+                result_size = ovfcheck(result_size + len(by))
+                remaining_size = len(input) - upper
+                result_size = ovfcheck(result_size + remaining_size)
+            except OverflowError:
+                raise
+            builder = Builder(result_size)
+            for i in range(upper):
+                builder.append(by)
+                builder.append(input[i])
+            builder.append(by)
+            builder.append_slice(input, upper, len(input))
+        else:
+            # First compute the exact result size
+            count = input.count(sub)
+            if count > maxsplit and maxsplit > 0:
+                count = maxsplit
+            diff_len = len(by) - len(sub)
+            try:
+                result_size = ovfcheck(diff_len * count)
+                result_size = ovfcheck(result_size + len(input))
+            except OverflowError:
+                raise
+
+            builder = Builder(result_size)
+            start = 0
+            sublen = len(sub)
+
+            while maxsplit != 0:
+                next = input.find(sub, start)
+                if next < 0:
+                    break
+                builder.append_slice(input, start, next)
+                builder.append(by)
+                start = next + sublen
+                maxsplit -= 1   # NB. if it's already < 0, it stays < 0
+
+            builder.append_slice(input, start, len(input))
+
+        return builder.build()
+    replace.func_name = func_name
+    return replace
+
+_string_replace = make_replace("_string_replace", StringBuilder)
+_unicode_replace = make_replace("_unicode_replace", UnicodeBuilder)
+
+ at specialize.argtype(0)
+def replace(input, sub, by, maxsplit=-1):
+    if isinstance(input, str):
+        assert isinstance(sub, str)
+        assert isinstance(by, str)
+        return _string_replace(input, sub, by, maxsplit)
+    else:
+        assert isinstance(sub, unicode)
+        assert isinstance(by, unicode)
+        return _unicode_replace(input, sub, by, maxsplit)
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -1,7 +1,7 @@
 import sys, py
 
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit
-from rpython.rlib.rstring import string_replace
+from rpython.rlib.rstring import replace
 from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin
 
 def test_split():
@@ -48,37 +48,68 @@
     py.test.raises(ValueError, rsplit, u"abc", u'')
 
 def test_string_replace():
-    assert string_replace('one!two!three!', '!', '@', 1) == 'one at two!three!'
-    assert string_replace('one!two!three!', '!', '') == 'onetwothree'
-    assert string_replace('one!two!three!', '!', '@', 2) == 'one at two@three!'
-    assert string_replace('one!two!three!', '!', '@', 3) == 'one at two@three@'
-    assert string_replace('one!two!three!', '!', '@', 4) == 'one at two@three@'
-    assert string_replace('one!two!three!', '!', '@', 0) == 'one!two!three!'
-    assert string_replace('one!two!three!', '!', '@') == 'one at two@three@'
-    assert string_replace('one!two!three!', 'x', '@') == 'one!two!three!'
-    assert string_replace('one!two!three!', 'x', '@', 2) == 'one!two!three!'
-    assert string_replace('abc', '', '-') == '-a-b-c-'
-    assert string_replace('abc', '', '-', 3) == '-a-b-c'
-    assert string_replace('abc', '', '-', 0) == 'abc'
-    assert string_replace('', '', '') == ''
-    assert string_replace('', '', 'a') == 'a'
-    assert string_replace('abc', 'ab', '--', 0) == 'abc'
-    assert string_replace('abc', 'xy', '--') == 'abc'
-    assert string_replace('123', '123', '') == ''
-    assert string_replace('123123', '123', '') == ''
-    assert string_replace('123x123', '123', '') == 'x'
+    assert replace('one!two!three!', '!', '@', 1) == 'one at two!three!'
+    assert replace('one!two!three!', '!', '') == 'onetwothree'
+    assert replace('one!two!three!', '!', '@', 2) == 'one at two@three!'
+    assert replace('one!two!three!', '!', '@', 3) == 'one at two@three@'
+    assert replace('one!two!three!', '!', '@', 4) == 'one at two@three@'
+    assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!'
+    assert replace('one!two!three!', '!', '@') == 'one at two@three@'
+    assert replace('one!two!three!', 'x', '@') == 'one!two!three!'
+    assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!'
+    assert replace('abc', '', '-') == '-a-b-c-'
+    assert replace('abc', '', '-', 3) == '-a-b-c'
+    assert replace('abc', '', '-', 0) == 'abc'
+    assert replace('', '', '') == ''
+    assert replace('', '', 'a') == 'a'
+    assert replace('abc', 'ab', '--', 0) == 'abc'
+    assert replace('abc', 'xy', '--') == 'abc'
+    assert replace('123', '123', '') == ''
+    assert replace('123123', '123', '') == ''
+    assert replace('123x123', '123', '') == 'x'
 
 def test_string_replace_overflow():
     if sys.maxint > 2**31-1:
         py.test.skip("Wrong platform")
     s = "a" * (2**16)
     with py.test.raises(OverflowError):
-        string_replace(s, "", s)
+        replace(s, "", s)
     with py.test.raises(OverflowError):
-        string_replace(s, "a", s)
+        replace(s, "a", s)
     with py.test.raises(OverflowError):
-        string_replace(s, "a", s, len(s) - 10)
+        replace(s, "a", s, len(s) - 10)
 
+def test_unicode_replace():
+    assert replace(u'one!two!three!', u'!', u'@', 1) == u'one at two!three!'
+    assert replace(u'one!two!three!', u'!', u'') == u'onetwothree'
+    assert replace(u'one!two!three!', u'!', u'@', 2) == u'one at two@three!'
+    assert replace(u'one!two!three!', u'!', u'@', 3) == u'one at two@three@'
+    assert replace(u'one!two!three!', u'!', u'@', 4) == u'one at two@three@'
+    assert replace(u'one!two!three!', u'!', u'@', 0) == u'one!two!three!'
+    assert replace(u'one!two!three!', u'!', u'@') == u'one at two@three@'
+    assert replace(u'one!two!three!', u'x', u'@') == u'one!two!three!'
+    assert replace(u'one!two!three!', u'x', u'@', 2) == u'one!two!three!'
+    assert replace(u'abc', u'', u'-') == u'-a-b-c-'
+    assert replace(u'abc', u'', u'-', 3) == u'-a-b-c'
+    assert replace(u'abc', u'', u'-', 0) == u'abc'
+    assert replace(u'', u'', u'') == u''
+    assert replace(u'', u'', u'a') == u'a'
+    assert replace(u'abc', u'ab', u'--', 0) == u'abc'
+    assert replace(u'abc', u'xy', u'--') == u'abc'
+    assert replace(u'123', u'123', u'') == u''
+    assert replace(u'123123', u'123', u'') == u''
+    assert replace(u'123x123', u'123', u'') == u'x'
+
+def test_unicode_replace_overflow():
+    if sys.maxint > 2**31-1:
+        py.test.skip("Wrong platform")
+    s = u"a" * (2**16)
+    with py.test.raises(OverflowError):
+        replace(s, u"", s)
+    with py.test.raises(OverflowError):
+        replace(s, u"a", s)
+    with py.test.raises(OverflowError):
+        replace(s, u"a", s, len(s) - 10)
 
 def test_string_builder():
     s = StringBuilder()
@@ -102,7 +133,7 @@
 
 
 class TestTranslates(LLRtypeMixin, BaseRtypingTest):
-    def test_split_rsplit_translate(self):
+    def test_split_rsplit(self):
         def fn():
             res = True
             res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd']
@@ -118,3 +149,13 @@
         res = self.interpret(fn, [])
         assert res
 
+    def test_replace(self):
+        def fn():
+            res = True
+            res = res and replace('abc', 'ab', '--', 0) == 'abc'
+            res = res and replace('abc', 'xy', '--') == 'abc'
+            res = res and replace('abc', 'ab', '--', 0) == 'abc'
+            res = res and replace('abc', 'xy', '--') == 'abc'
+            return res
+        res = self.interpret(fn, [])
+        assert res


More information about the pypy-commit mailing list