[pypy-commit] pypy default: make a unicode version of replace as well, and expose them both under a
cfbolz
noreply at buildbot.pypy.org
Mon Jun 17 14:35:25 CEST 2013
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch:
Changeset: r64910:16bde61a597c
Date: 2013-06-14 18:41 +0200
http://bitbucket.org/pypy/pypy/changeset/16bde61a597c/
Log: make a unicode version of replace as well, and expose them both
under a specialized interface, "replace"
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -82,58 +82,8 @@
res.reverse()
return res
-def string_replace(input, sub, by, maxsplit=-1):
- if maxsplit == 0:
- return input
- if not sub:
- upper = len(input)
- if maxsplit > 0 and maxsplit < upper + 2:
- upper = maxsplit - 1
- assert upper >= 0
- try:
- result_size = ovfcheck(upper * len(by))
- result_size = ovfcheck(result_size + upper)
- result_size = ovfcheck(result_size + len(by))
- remaining_size = len(input) - upper
- result_size = ovfcheck(result_size + remaining_size)
- except OverflowError:
- raise
- builder = StringBuilder(result_size)
- for i in range(upper):
- builder.append(by)
- builder.append(input[i])
- builder.append(by)
- builder.append_slice(input, upper, len(input))
- else:
- # First compute the exact result size
- count = input.count(sub)
- if count > maxsplit and maxsplit > 0:
- count = maxsplit
- diff_len = len(by) - len(sub)
- try:
- result_size = ovfcheck(diff_len * count)
- result_size = ovfcheck(result_size + len(input))
- except OverflowError:
- raise
-
- builder = StringBuilder(result_size)
- start = 0
- sublen = len(sub)
-
- while maxsplit != 0:
- next = input.find(sub, start)
- if next < 0:
- break
- builder.append_slice(input, start, next)
- builder.append(by)
- start = next + sublen
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
-
- builder.append_slice(input, start, len(input))
-
- return builder.build()
# -------------- public API ---------------------------------
@@ -352,3 +302,75 @@
def specialize_call(self, hop):
hop.exception_cannot_occur()
+
+
+
+def make_replace(func_name, Builder):
+ def replace(input, sub, by, maxsplit=-1):
+ if maxsplit == 0:
+ return input
+
+ if not sub:
+ upper = len(input)
+ if maxsplit > 0 and maxsplit < upper + 2:
+ upper = maxsplit - 1
+ assert upper >= 0
+
+ try:
+ result_size = ovfcheck(upper * len(by))
+ result_size = ovfcheck(result_size + upper)
+ result_size = ovfcheck(result_size + len(by))
+ remaining_size = len(input) - upper
+ result_size = ovfcheck(result_size + remaining_size)
+ except OverflowError:
+ raise
+ builder = Builder(result_size)
+ for i in range(upper):
+ builder.append(by)
+ builder.append(input[i])
+ builder.append(by)
+ builder.append_slice(input, upper, len(input))
+ else:
+ # First compute the exact result size
+ count = input.count(sub)
+ if count > maxsplit and maxsplit > 0:
+ count = maxsplit
+ diff_len = len(by) - len(sub)
+ try:
+ result_size = ovfcheck(diff_len * count)
+ result_size = ovfcheck(result_size + len(input))
+ except OverflowError:
+ raise
+
+ builder = Builder(result_size)
+ start = 0
+ sublen = len(sub)
+
+ while maxsplit != 0:
+ next = input.find(sub, start)
+ if next < 0:
+ break
+ builder.append_slice(input, start, next)
+ builder.append(by)
+ start = next + sublen
+ maxsplit -= 1 # NB. if it's already < 0, it stays < 0
+
+ builder.append_slice(input, start, len(input))
+
+ return builder.build()
+ replace.func_name = func_name
+ return replace
+
+_string_replace = make_replace("_string_replace", StringBuilder)
+_unicode_replace = make_replace("_unicode_replace", UnicodeBuilder)
+
+ at specialize.argtype(0)
+def replace(input, sub, by, maxsplit=-1):
+ if isinstance(input, str):
+ assert isinstance(sub, str)
+ assert isinstance(by, str)
+ return _string_replace(input, sub, by, maxsplit)
+ else:
+ assert isinstance(sub, unicode)
+ assert isinstance(by, unicode)
+ return _unicode_replace(input, sub, by, maxsplit)
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -1,7 +1,7 @@
import sys, py
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit
-from rpython.rlib.rstring import string_replace
+from rpython.rlib.rstring import replace
from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin
def test_split():
@@ -48,37 +48,68 @@
py.test.raises(ValueError, rsplit, u"abc", u'')
def test_string_replace():
- assert string_replace('one!two!three!', '!', '@', 1) == 'one at two!three!'
- assert string_replace('one!two!three!', '!', '') == 'onetwothree'
- assert string_replace('one!two!three!', '!', '@', 2) == 'one at two@three!'
- assert string_replace('one!two!three!', '!', '@', 3) == 'one at two@three@'
- assert string_replace('one!two!three!', '!', '@', 4) == 'one at two@three@'
- assert string_replace('one!two!three!', '!', '@', 0) == 'one!two!three!'
- assert string_replace('one!two!three!', '!', '@') == 'one at two@three@'
- assert string_replace('one!two!three!', 'x', '@') == 'one!two!three!'
- assert string_replace('one!two!three!', 'x', '@', 2) == 'one!two!three!'
- assert string_replace('abc', '', '-') == '-a-b-c-'
- assert string_replace('abc', '', '-', 3) == '-a-b-c'
- assert string_replace('abc', '', '-', 0) == 'abc'
- assert string_replace('', '', '') == ''
- assert string_replace('', '', 'a') == 'a'
- assert string_replace('abc', 'ab', '--', 0) == 'abc'
- assert string_replace('abc', 'xy', '--') == 'abc'
- assert string_replace('123', '123', '') == ''
- assert string_replace('123123', '123', '') == ''
- assert string_replace('123x123', '123', '') == 'x'
+ assert replace('one!two!three!', '!', '@', 1) == 'one at two!three!'
+ assert replace('one!two!three!', '!', '') == 'onetwothree'
+ assert replace('one!two!three!', '!', '@', 2) == 'one at two@three!'
+ assert replace('one!two!three!', '!', '@', 3) == 'one at two@three@'
+ assert replace('one!two!three!', '!', '@', 4) == 'one at two@three@'
+ assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!'
+ assert replace('one!two!three!', '!', '@') == 'one at two@three@'
+ assert replace('one!two!three!', 'x', '@') == 'one!two!three!'
+ assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!'
+ assert replace('abc', '', '-') == '-a-b-c-'
+ assert replace('abc', '', '-', 3) == '-a-b-c'
+ assert replace('abc', '', '-', 0) == 'abc'
+ assert replace('', '', '') == ''
+ assert replace('', '', 'a') == 'a'
+ assert replace('abc', 'ab', '--', 0) == 'abc'
+ assert replace('abc', 'xy', '--') == 'abc'
+ assert replace('123', '123', '') == ''
+ assert replace('123123', '123', '') == ''
+ assert replace('123x123', '123', '') == 'x'
def test_string_replace_overflow():
if sys.maxint > 2**31-1:
py.test.skip("Wrong platform")
s = "a" * (2**16)
with py.test.raises(OverflowError):
- string_replace(s, "", s)
+ replace(s, "", s)
with py.test.raises(OverflowError):
- string_replace(s, "a", s)
+ replace(s, "a", s)
with py.test.raises(OverflowError):
- string_replace(s, "a", s, len(s) - 10)
+ replace(s, "a", s, len(s) - 10)
+def test_unicode_replace():
+ assert replace(u'one!two!three!', u'!', u'@', 1) == u'one at two!three!'
+ assert replace(u'one!two!three!', u'!', u'') == u'onetwothree'
+ assert replace(u'one!two!three!', u'!', u'@', 2) == u'one at two@three!'
+ assert replace(u'one!two!three!', u'!', u'@', 3) == u'one at two@three@'
+ assert replace(u'one!two!three!', u'!', u'@', 4) == u'one at two@three@'
+ assert replace(u'one!two!three!', u'!', u'@', 0) == u'one!two!three!'
+ assert replace(u'one!two!three!', u'!', u'@') == u'one at two@three@'
+ assert replace(u'one!two!three!', u'x', u'@') == u'one!two!three!'
+ assert replace(u'one!two!three!', u'x', u'@', 2) == u'one!two!three!'
+ assert replace(u'abc', u'', u'-') == u'-a-b-c-'
+ assert replace(u'abc', u'', u'-', 3) == u'-a-b-c'
+ assert replace(u'abc', u'', u'-', 0) == u'abc'
+ assert replace(u'', u'', u'') == u''
+ assert replace(u'', u'', u'a') == u'a'
+ assert replace(u'abc', u'ab', u'--', 0) == u'abc'
+ assert replace(u'abc', u'xy', u'--') == u'abc'
+ assert replace(u'123', u'123', u'') == u''
+ assert replace(u'123123', u'123', u'') == u''
+ assert replace(u'123x123', u'123', u'') == u'x'
+
+def test_unicode_replace_overflow():
+ if sys.maxint > 2**31-1:
+ py.test.skip("Wrong platform")
+ s = u"a" * (2**16)
+ with py.test.raises(OverflowError):
+ replace(s, u"", s)
+ with py.test.raises(OverflowError):
+ replace(s, u"a", s)
+ with py.test.raises(OverflowError):
+ replace(s, u"a", s, len(s) - 10)
def test_string_builder():
s = StringBuilder()
@@ -102,7 +133,7 @@
class TestTranslates(LLRtypeMixin, BaseRtypingTest):
- def test_split_rsplit_translate(self):
+ def test_split_rsplit(self):
def fn():
res = True
res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd']
@@ -118,3 +149,13 @@
res = self.interpret(fn, [])
assert res
+ def test_replace(self):
+ def fn():
+ res = True
+ res = res and replace('abc', 'ab', '--', 0) == 'abc'
+ res = res and replace('abc', 'xy', '--') == 'abc'
+ res = res and replace('abc', 'ab', '--', 0) == 'abc'
+ res = res and replace('abc', 'xy', '--') == 'abc'
+ return res
+ res = self.interpret(fn, [])
+ assert res
More information about the pypy-commit
mailing list