[pypy-commit] pypy default: merge
cfbolz
noreply at buildbot.pypy.org
Mon Jun 17 14:35:34 CEST 2013
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch:
Changeset: r64917:53b7cc76daf1
Date: 2013-06-17 14:33 +0200
http://bitbucket.org/pypy/pypy/changeset/53b7cc76daf1/
Log: merge
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -904,6 +904,9 @@
def newlist_str(self, list_s):
return self.newlist([self.wrap(s) for s in list_s])
+ def newlist_unicode(self, list_u):
+ return self.newlist([self.wrap(u) for u in list_u])
+
def newlist_hint(self, sizehint):
from pypy.objspace.std.listobject import make_empty_list_with_size
return make_empty_list_with_size(self, sizehint)
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -166,6 +166,12 @@
storage = strategy.erase(list_s)
return W_ListObject.from_storage_and_strategy(space, storage, strategy)
+ @staticmethod
+ def newlist_unicode(space, list_u):
+ strategy = space.fromcache(UnicodeListStrategy)
+ storage = strategy.erase(list_u)
+ return W_ListObject.from_storage_and_strategy(space, storage, strategy)
+
def __repr__(self):
""" representation for debugging purposes """
return "%s(%s, %s)" % (self.__class__.__name__, self.strategy,
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -294,6 +294,9 @@
def newlist_str(self, list_s):
return W_ListObject.newlist_str(self, list_s)
+ def newlist_unicode(self, list_u):
+ return W_ListObject.newlist_unicode(self, list_u)
+
def newdict(self, module=False, instance=False, kwargs=False,
strdict=False):
return W_DictMultiObject.allocate_and_init_instance(
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -11,12 +11,13 @@
from pypy.objspace.std.register_all import register_all
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
from pypy.objspace.std.stringtype import (
- joined2, sliced, stringendswith, stringstartswith, wrapchar, wrapstr)
+ joined2, sliced, wrapchar, wrapstr)
from rpython.rlib import jit
from rpython.rlib.objectmodel import (
compute_hash, compute_unique_id, specialize)
from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rlib.rstring import StringBuilder, split
+from rpython.rlib.rstring import (StringBuilder, split, rsplit, replace,
+ endswith, startswith)
class W_AbstractStringObject(W_Object):
@@ -287,31 +288,12 @@
bylen = len(by)
if bylen == 0:
raise OperationError(space.w_ValueError, space.wrap("empty separator"))
-
- if bylen == 1 and maxsplit < 0:
- res = []
- start = 0
- # fast path: uses str.rfind(character) and str.count(character)
- by = by[0] # annotator hack: string -> char
- count = value.count(by)
- res = [None] * (count + 1)
- end = len(value)
- while count >= 0:
- assert end >= 0
- prev = value.rfind(by, 0, end)
- start = prev + 1
- assert start >= 0
- res[count] = value[start:end]
- count -= 1
- end = prev
- else:
- res = split(value, by, maxsplit)
-
+ res = split(value, by, maxsplit)
return space.newlist_str(res)
def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):
maxsplit = space.int_w(w_maxsplit)
- res_w = []
+ res = []
value = w_self._value
i = len(value)-1
while True:
@@ -336,43 +318,21 @@
# the word is value[j+1:i+1]
j1 = j + 1
assert j1 >= 0
- res_w.append(sliced(space, value, j1, i+1, w_self))
+ res.append(value[j1:i+1])
# continue to look from the character before the space before the word
i = j - 1
- res_w.reverse()
- return space.newlist(res_w)
+ res.reverse()
+ return space.newlist_str(res)
-def make_rsplit_with_delim(funcname, sliced):
- from rpython.tool.sourcetools import func_with_new_name
-
- def fn(space, w_self, w_by, w_maxsplit=-1):
- maxsplit = space.int_w(w_maxsplit)
- res_w = []
- value = w_self._value
- end = len(value)
- by = w_by._value
- bylen = len(by)
- if bylen == 0:
- raise OperationError(space.w_ValueError, space.wrap("empty separator"))
-
- while maxsplit != 0:
- next = value.rfind(by, 0, end)
- if next < 0:
- break
- res_w.append(sliced(space, value, next+bylen, end, w_self))
- end = next
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
-
- res_w.append(sliced(space, value, 0, end, w_self))
- res_w.reverse()
- return space.newlist(res_w)
-
- return func_with_new_name(fn, funcname)
-
-str_rsplit__String_String_ANY = make_rsplit_with_delim('str_rsplit__String_String_ANY',
- sliced)
+def str_rsplit__String_String_ANY(space, w_self, w_by, w_maxsplit=-1):
+ maxsplit = space.int_w(w_maxsplit)
+ value = w_self._value
+ by = w_by._value
+ if not by:
+ raise OperationError(space.w_ValueError, space.wrap("empty separator"))
+ return space.newlist_str(rsplit(value, by, maxsplit))
def str_join__String_ANY(space, w_self, w_list):
l = space.listview_str(w_list)
@@ -524,75 +484,30 @@
return space.wrap(res)
-def _string_replace(space, input, sub, by, maxsplit):
- if maxsplit == 0:
- return space.wrap(input)
-
- if not sub:
- upper = len(input)
- if maxsplit > 0 and maxsplit < upper + 2:
- upper = maxsplit - 1
- assert upper >= 0
-
- try:
- result_size = ovfcheck(upper * len(by))
- result_size = ovfcheck(result_size + upper)
- result_size = ovfcheck(result_size + len(by))
- remaining_size = len(input) - upper
- result_size = ovfcheck(result_size + remaining_size)
- except OverflowError:
- raise OperationError(space.w_OverflowError,
- space.wrap("replace string is too long")
- )
- builder = StringBuilder(result_size)
- for i in range(upper):
- builder.append(by)
- builder.append(input[i])
- builder.append(by)
- builder.append_slice(input, upper, len(input))
- else:
- # First compute the exact result size
- count = input.count(sub)
- if count > maxsplit and maxsplit > 0:
- count = maxsplit
- diff_len = len(by) - len(sub)
- try:
- result_size = ovfcheck(diff_len * count)
- result_size = ovfcheck(result_size + len(input))
- except OverflowError:
- raise OperationError(space.w_OverflowError,
- space.wrap("replace string is too long")
- )
-
- builder = StringBuilder(result_size)
- start = 0
- sublen = len(sub)
-
- while maxsplit != 0:
- next = input.find(sub, start)
- if next < 0:
- break
- builder.append_slice(input, start, next)
- builder.append(by)
- start = next + sublen
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
-
- builder.append_slice(input, start, len(input))
-
- return space.wrap(builder.build())
-
def str_replace__String_ANY_ANY_ANY(space, w_self, w_sub, w_by, w_maxsplit):
- return _string_replace(space, w_self._value, space.buffer_w(w_sub).as_str(),
- space.buffer_w(w_by).as_str(),
- space.int_w(w_maxsplit))
+ sub = space.buffer_w(w_sub).as_str()
+ by = space.buffer_w(w_by).as_str()
+ maxsplit = space.int_w(w_maxsplit)
+ try:
+ res = replace(w_self._value, sub, by, maxsplit)
+ except OverflowError:
+ raise OperationError(space.w_OverflowError,
+ space.wrap("replace string is too long")
+ )
+ return space.wrap(res)
def str_replace__String_String_String_ANY(space, w_self, w_sub, w_by, w_maxsplit=-1):
- input = w_self._value
sub = w_sub._value
by = w_by._value
maxsplit = space.int_w(w_maxsplit)
- return _string_replace(space, input, sub, by, maxsplit)
+ try:
+ res = replace(w_self._value, sub, by, maxsplit)
+ except OverflowError:
+ raise OperationError(space.w_OverflowError,
+ space.wrap("replace string is too long")
+ )
+ return space.wrap(res)
def _strip(space, w_self, w_chars, left, right):
"internal function called by str_xstrip methods"
@@ -679,7 +594,7 @@
def str_endswith__String_String_ANY_ANY(space, w_self, w_suffix, w_start, w_end):
(u_self, start, end) = _convert_idx_params(space, w_self, w_start,
w_end, True)
- return space.newbool(stringendswith(u_self, w_suffix._value, start, end))
+ return space.newbool(endswith(u_self, w_suffix._value, start, end))
def str_endswith__String_ANY_ANY_ANY(space, w_self, w_suffixes, w_start, w_end):
if not space.isinstance_w(w_suffixes, space.w_tuple):
@@ -692,14 +607,14 @@
return space.call_method(w_u, "endswith", w_suffixes, w_start,
w_end)
suffix = space.str_w(w_suffix)
- if stringendswith(u_self, suffix, start, end):
+ if endswith(u_self, suffix, start, end):
return space.w_True
return space.w_False
def str_startswith__String_String_ANY_ANY(space, w_self, w_prefix, w_start, w_end):
(u_self, start, end) = _convert_idx_params(space, w_self, w_start,
w_end, True)
- return space.newbool(stringstartswith(u_self, w_prefix._value, start, end))
+ return space.newbool(startswith(u_self, w_prefix._value, start, end))
def str_startswith__String_ANY_ANY_ANY(space, w_self, w_prefixes, w_start, w_end):
if not space.isinstance_w(w_prefixes, space.w_tuple):
@@ -712,7 +627,7 @@
return space.call_method(w_u, "startswith", w_prefixes, w_start,
w_end)
prefix = space.str_w(w_prefix)
- if stringstartswith(u_self, prefix, start, end):
+ if startswith(u_self, prefix, start, end):
return space.w_True
return space.w_False
@@ -768,26 +683,7 @@
def str_splitlines__String_ANY(space, w_self, w_keepends):
u_keepends = space.int_w(w_keepends) # truth value, but type checked
data = w_self._value
- selflen = len(data)
- strs_w = []
- i = j = 0
- while i < selflen:
- # Find a line and append it
- while i < selflen and data[i] != '\n' and data[i] != '\r':
- i += 1
- # Skip the line break reading CRLF as one line break
- eol = i
- i += 1
- if i < selflen and data[i-1] == '\r' and data[i] == '\n':
- i += 1
- if u_keepends:
- eol = i
- strs_w.append(sliced(space, data, j, eol, w_self))
- j = i
-
- if j < selflen:
- strs_w.append(sliced(space, data, j, len(data), w_self))
- return space.newlist(strs_w)
+ return space.newlist_str(data.splitlines(u_keepends))
def str_zfill__String_ANY(space, w_self, w_width):
input = w_self._value
diff --git a/pypy/objspace/std/stringtype.py b/pypy/objspace/std/stringtype.py
--- a/pypy/objspace/std/stringtype.py
+++ b/pypy/objspace/std/stringtype.py
@@ -295,28 +295,3 @@
str_typedef.registermethods(globals())
-# ____________________________________________________________
-
-# Helpers for several string implementations
-
- at specialize.argtype(0)
- at jit.elidable
-def stringendswith(u_self, suffix, start, end):
- begin = end - len(suffix)
- if begin < start:
- return False
- for i in range(len(suffix)):
- if u_self[begin+i] != suffix[i]:
- return False
- return True
-
- at specialize.argtype(0)
- at jit.elidable
-def stringstartswith(u_self, prefix, start, end):
- stop = start + len(prefix)
- if stop > end:
- return False
- for i in range(len(prefix)):
- if u_self[start+i] != prefix[i]:
- return False
- return True
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -555,10 +555,30 @@
try:
w_l = space.call_method(w_s, "split")
w_l2 = space.call_method(w_s, "split", space.wrap(" "))
+ w_l3 = space.call_method(w_s, "rsplit")
+ w_l4 = space.call_method(w_s, "rsplit", space.wrap(" "))
finally:
del space.newlist
assert space.listview_str(w_l) == ["a", "b", "c"]
assert space.listview_str(w_l2) == ["a", "b", "c"]
+ assert space.listview_str(w_l3) == ["a", "b", "c"]
+ assert space.listview_str(w_l4) == ["a", "b", "c"]
+
+ def test_unicode_uses_newlist_unicode(self):
+ space = self.space
+ w_u = space.wrap(u"a b c")
+ space.newlist = None
+ try:
+ w_l = space.call_method(w_u, "split")
+ w_l2 = space.call_method(w_u, "split", space.wrap(" "))
+ w_l3 = space.call_method(w_u, "rsplit")
+ w_l4 = space.call_method(w_u, "rsplit", space.wrap(" "))
+ finally:
+ del space.newlist
+ assert space.listview_unicode(w_l) == [u"a", u"b", u"c"]
+ assert space.listview_unicode(w_l2) == [u"a", u"b", u"c"]
+ assert space.listview_unicode(w_l3) == [u"a", u"b", u"c"]
+ assert space.listview_unicode(w_l4) == [u"a", u"b", u"c"]
def test_pop_without_argument_is_fast(self):
space = self.space
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -8,15 +8,14 @@
from pypy.objspace.std.multimethod import FailedToImplement
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
-from pypy.objspace.std.stringobject import (
- W_StringObject, make_rsplit_with_delim)
-from pypy.objspace.std.stringtype import stringendswith, stringstartswith
+from pypy.objspace.std.stringobject import W_StringObject
from pypy.objspace.std.register_all import register_all
from rpython.rlib import jit
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rlib.objectmodel import (
compute_hash, compute_unique_id, specialize)
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import (UnicodeBuilder, split, rsplit, replace,
+ startswith, endswith)
from rpython.rlib.runicode import make_unicode_escape_function
from rpython.tool.sourcetools import func_with_new_name
@@ -490,14 +489,14 @@
def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self, start, end = _convert_idx_params(space, w_self,
w_start, w_end, True)
- return space.newbool(stringendswith(self, w_substr._value, start, end))
+ return space.newbool(endswith(self, w_substr._value, start, end))
def unicode_startswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self, start, end = _convert_idx_params(space, w_self, w_start, w_end, True)
# XXX this stuff can be waaay better for ootypebased backends if
# we re-use more of our rpython machinery (ie implement startswith
# with additional parameters as rpython)
- return space.newbool(stringstartswith(self, w_substr._value, start, end))
+ return space.newbool(startswith(self, w_substr._value, start, end))
def unicode_startswith__Unicode_ANY_ANY_ANY(space, w_unistr, w_prefixes,
w_start, w_end):
@@ -507,7 +506,7 @@
w_start, w_end, True)
for w_prefix in space.fixedview(w_prefixes):
prefix = space.unicode_w(w_prefix)
- if stringstartswith(unistr, prefix, start, end):
+ if startswith(unistr, prefix, start, end):
return space.w_True
return space.w_False
@@ -519,7 +518,7 @@
w_start, w_end, True)
for w_suffix in space.fixedview(w_suffixes):
suffix = space.unicode_w(w_suffix)
- if stringendswith(unistr, suffix, start, end):
+ if endswith(unistr, suffix, start, end):
return space.w_True
return space.w_False
@@ -608,17 +607,17 @@
if (self[pos] == u'\r' and pos + 1 < end and
self[pos + 1] == u'\n'):
# Count CRLF as one linebreak
- lines.append(W_UnicodeObject(self[start:pos + keepends * 2]))
+ lines.append(self[start:pos + keepends * 2])
pos += 1
else:
- lines.append(W_UnicodeObject(self[start:pos + keepends]))
+ lines.append(self[start:pos + keepends])
pos += 1
start = pos
else:
pos += 1
if not unicodedb.islinebreak(ord(self[end - 1])):
- lines.append(W_UnicodeObject(self[start:]))
- return space.newlist(lines)
+ lines.append(self[start:])
+ return space.newlist_unicode(lines)
def unicode_find__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
@@ -650,7 +649,7 @@
def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
maxsplit = space.int_w(w_maxsplit)
- res_w = []
+ res = []
value = w_self._value
length = len(value)
i = 0
@@ -673,12 +672,12 @@
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
# the word is value[i:j]
- res_w.append(W_UnicodeObject(value[i:j]))
+ res.append(value[i:j])
# continue to look from the character following the space after the word
i = j + 1
- return space.newlist(res_w)
+ return space.newlist_unicode(res)
def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
self = w_self._value
@@ -688,13 +687,13 @@
if delim_len == 0:
raise OperationError(space.w_ValueError,
space.wrap('empty separator'))
- parts = _split_with(self, delim, maxsplit)
- return space.newlist([W_UnicodeObject(part) for part in parts])
+ parts = split(self, delim, maxsplit)
+ return space.newlist_unicode(parts)
def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
maxsplit = space.int_w(w_maxsplit)
- res_w = []
+ res = []
value = w_self._value
i = len(value)-1
while True:
@@ -719,59 +718,32 @@
# the word is value[j+1:i+1]
j1 = j + 1
assert j1 >= 0
- res_w.append(W_UnicodeObject(value[j1:i+1]))
+ res.append(value[j1:i+1])
# continue to look from the character before the space before the word
i = j - 1
- res_w.reverse()
- return space.newlist(res_w)
+ res.reverse()
+ return space.newlist_unicode(res)
-def sliced(space, s, start, stop, orig_obj):
- assert start >= 0
- assert stop >= 0
- if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), space.w_unicode):
- return orig_obj
- return space.wrap( s[start:stop])
-
-unicode_rsplit__Unicode_Unicode_ANY = make_rsplit_with_delim('unicode_rsplit__Unicode_Unicode_ANY',
- sliced)
-
-def _split_into_chars(self, maxsplit):
- if maxsplit == 0:
- return [self]
- index = 0
- end = len(self)
- parts = [u'']
- maxsplit -= 1
- while maxsplit != 0:
- if index >= end:
- break
- parts.append(self[index])
- index += 1
- maxsplit -= 1
- parts.append(self[index:])
- return parts
-
-def _split_with(self, with_, maxsplit=-1):
- parts = []
- start = 0
- end = len(self)
- length = len(with_)
- while maxsplit != 0:
- index = self.find(with_, start, end)
- if index < 0:
- break
- parts.append(self[start:index])
- start = index + length
- maxsplit -= 1
- parts.append(self[start:])
- return parts
+def unicode_rsplit__Unicode_Unicode_ANY(space, w_self, w_by, w_maxsplit=-1):
+ maxsplit = space.int_w(w_maxsplit)
+ value = w_self._value
+ by = w_by._value
+ if not by:
+ raise OperationError(space.w_ValueError, space.wrap("empty separator"))
+ return space.newlist_unicode(rsplit(value, by, maxsplit))
def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
w_new, w_maxsplit):
- return _unicode_replace(space, w_self, w_old._value, w_new._value,
- w_maxsplit)
+ maxsplit = space.int_w(w_maxsplit)
+ try:
+ return W_UnicodeObject(
+ replace(w_self._value, w_old._value, w_new._value, maxsplit))
+ except OverflowError:
+ raise OperationError(
+ space.w_OverflowError,
+ space.wrap("replace string is too long"))
def unicode_replace__Unicode_ANY_ANY_ANY(space, w_self, w_old, w_new,
w_maxsplit):
@@ -783,27 +755,14 @@
new = unicode(space.bufferstr_w(w_new))
else:
new = space.unicode_w(w_new)
- return _unicode_replace(space, w_self, old, new, w_maxsplit)
-
-def _unicode_replace(space, w_self, old, new, w_maxsplit):
- if len(old):
- parts = _split_with(w_self._value, old, space.int_w(w_maxsplit))
- else:
- self = w_self._value
- maxsplit = space.int_w(w_maxsplit)
- parts = _split_into_chars(self, maxsplit)
-
+ maxsplit = space.int_w(w_maxsplit)
try:
- one = ovfcheck(len(parts) * len(new))
- ovfcheck(one + len(w_self._value))
+ return W_UnicodeObject(replace(w_self._value, old, new, maxsplit))
except OverflowError:
raise OperationError(
space.w_OverflowError,
space.wrap("replace string is too long"))
- return W_UnicodeObject(new.join(parts))
-
-
def unicode_encode__Unicode_ANY_ANY(space, w_unistr,
w_encoding=None,
w_errors=None):
@@ -848,7 +807,7 @@
def unicode_expandtabs__Unicode_ANY(space, w_self, w_tabsize):
self = w_self._value
tabsize = space.int_w(w_tabsize)
- parts = _split_with(self, u'\t')
+ parts = self.split(u'\t')
result = [parts[0]]
prevsize = 0
for ch in parts[0]:
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -1,25 +1,50 @@
""" String builder interface and string functions
"""
+import sys
from rpython.annotator.model import (SomeObject, SomeString, s_None, SomeChar,
SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePtr, SomePBC)
-from rpython.rlib.objectmodel import newlist_hint
+from rpython.rlib.objectmodel import newlist_hint, specialize
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rtyper.extregistry import ExtRegistryEntry
from rpython.tool.pairtype import pairtype
+from rpython.rlib import jit
# -------------- public API for string functions -----------------------
+
+ at specialize.argtype(0)
def split(value, by, maxsplit=-1):
+ if isinstance(value, str):
+ assert isinstance(by, str)
+ else:
+ assert isinstance(by, unicode)
bylen = len(by)
if bylen == 0:
raise ValueError("empty separator")
+ start = 0
+ if bylen == 1:
+ # fast path: uses str.rfind(character) and str.count(character)
+ by = by[0] # annotator hack: string -> char
+ count = value.count(by)
+ if 0 <= maxsplit < count:
+ count = maxsplit
+ res = newlist_hint(count + 1)
+ while count > 0:
+ next = value.find(by, start)
+ assert next >= 0 # cannot fail due to the value.count above
+ res.append(value[start:next])
+ start = next + bylen
+ count -= 1
+ res.append(value[start:len(value)])
+ return res
+
if maxsplit > 0:
res = newlist_hint(min(maxsplit + 1, len(value)))
else:
res = []
- start = 0
+
while maxsplit != 0:
next = value.find(by, start)
if next < 0:
@@ -32,7 +57,12 @@
return res
+ at specialize.argtype(0)
def rsplit(value, by, maxsplit=-1):
+ if isinstance(value, str):
+ assert isinstance(by, str)
+ else:
+ assert isinstance(by, unicode)
if maxsplit > 0:
res = newlist_hint(min(maxsplit + 1, len(value)))
else:
@@ -54,6 +84,109 @@
res.reverse()
return res
+
+ at specialize.argtype(0)
+def replace(input, sub, by, maxsplit=-1):
+ if isinstance(input, str):
+ assert isinstance(sub, str)
+ assert isinstance(by, str)
+ Builder = StringBuilder
+ else:
+ assert isinstance(sub, unicode)
+ assert isinstance(by, unicode)
+ Builder = UnicodeBuilder
+ if maxsplit == 0:
+ return input
+
+ if not sub:
+ upper = len(input)
+ if maxsplit > 0 and maxsplit < upper + 2:
+ upper = maxsplit - 1
+ assert upper >= 0
+
+ try:
+ result_size = ovfcheck(upper * len(by))
+ result_size = ovfcheck(result_size + upper)
+ result_size = ovfcheck(result_size + len(by))
+ remaining_size = len(input) - upper
+ result_size = ovfcheck(result_size + remaining_size)
+ except OverflowError:
+ raise
+ builder = Builder(result_size)
+ for i in range(upper):
+ builder.append(by)
+ builder.append(input[i])
+ builder.append(by)
+ builder.append_slice(input, upper, len(input))
+ else:
+ # First compute the exact result size
+ count = input.count(sub)
+ if count > maxsplit and maxsplit > 0:
+ count = maxsplit
+ diff_len = len(by) - len(sub)
+ try:
+ result_size = ovfcheck(diff_len * count)
+ result_size = ovfcheck(result_size + len(input))
+ except OverflowError:
+ raise
+
+ builder = Builder(result_size)
+ start = 0
+ sublen = len(sub)
+
+ while maxsplit != 0:
+ next = input.find(sub, start)
+ if next < 0:
+ break
+ builder.append_slice(input, start, next)
+ builder.append(by)
+ start = next + sublen
+ maxsplit -= 1 # NB. if it's already < 0, it stays < 0
+
+ builder.append_slice(input, start, len(input))
+
+ return builder.build()
+
+def _normalize_start_end(length, start, end):
+ if start < 0:
+ start += length
+ if start < 0:
+ start = 0
+ if end < 0:
+ end += length
+ if end < 0:
+ end = 0
+ elif end > length:
+ end = length
+ return start, end
+
+ at specialize.argtype(0)
+ at jit.elidable
+def startswith(u_self, prefix, start=0, end=sys.maxint):
+ length = len(u_self)
+ start, end = _normalize_start_end(length, start, end)
+ stop = start + len(prefix)
+ if stop > end:
+ return False
+ for i in range(len(prefix)):
+ if u_self[start+i] != prefix[i]:
+ return False
+ return True
+
+ at specialize.argtype(0)
+ at jit.elidable
+def endswith(u_self, suffix, start=0, end=sys.maxint):
+ length = len(u_self)
+ start, end = _normalize_start_end(length, start, end)
+ begin = end - len(suffix)
+ if begin < start:
+ return False
+ for i in range(len(suffix)):
+ if u_self[begin+i] != suffix[i]:
+ return False
+ return True
+
+
# -------------- public API ---------------------------------
INIT_SIZE = 100 # XXX tweak
@@ -271,3 +404,5 @@
def specialize_call(self, hop):
hop.exception_cannot_occur()
+
+
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -1,6 +1,8 @@
import sys, py
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit
+from rpython.rlib.rstring import replace, startswith, endswith
+from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin
def test_split():
assert split("", 'x') == ['']
@@ -10,9 +12,21 @@
assert split('a|b|c|d', '|') == ['a', 'b', 'c', 'd']
assert split('a|b|c|d', '|', 2) == ['a', 'b', 'c|d']
assert split('a//b//c//d', '//') == ['a', 'b', 'c', 'd']
+ assert split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d']
assert split('endcase test', 'test') == ['endcase ', '']
py.test.raises(ValueError, split, 'abc', '')
+def test_split_unicode():
+ assert split(u"", u'x') == [u'']
+ assert split(u"a", u"a", 1) == [u'', u'']
+ assert split(u" ", u" ", 1) == [u'', u'']
+ assert split(u"aa", u"a", 2) == [u'', u'', u'']
+ assert split(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd']
+ assert split(u'a|b|c|d', u'|', 2) == [u'a', u'b', u'c|d']
+ assert split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd']
+ assert split(u'endcase test', u'test') == [u'endcase ', u'']
+ py.test.raises(ValueError, split, u'abc', u'')
+
def test_rsplit():
assert rsplit("a", "a", 1) == ['', '']
assert rsplit(" ", " ", 1) == ['', '']
@@ -23,6 +37,111 @@
assert rsplit('endcase test', 'test') == ['endcase ', '']
py.test.raises(ValueError, rsplit, "abc", '')
+def test_rsplit_unicode():
+ assert rsplit(u"a", u"a", 1) == [u'', u'']
+ assert rsplit(u" ", u" ", 1) == [u'', u'']
+ assert rsplit(u"aa", u"a", 2) == [u'', u'', u'']
+ assert rsplit(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd']
+ assert rsplit(u'a|b|c|d', u'|', 2) == [u'a|b', u'c', u'd']
+ assert rsplit(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd']
+ assert rsplit(u'endcase test', u'test') == [u'endcase ', u'']
+ py.test.raises(ValueError, rsplit, u"abc", u'')
+
+def test_string_replace():
+ assert replace('one!two!three!', '!', '@', 1) == 'one at two!three!'
+ assert replace('one!two!three!', '!', '') == 'onetwothree'
+ assert replace('one!two!three!', '!', '@', 2) == 'one at two@three!'
+ assert replace('one!two!three!', '!', '@', 3) == 'one at two@three@'
+ assert replace('one!two!three!', '!', '@', 4) == 'one at two@three@'
+ assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!'
+ assert replace('one!two!three!', '!', '@') == 'one at two@three@'
+ assert replace('one!two!three!', 'x', '@') == 'one!two!three!'
+ assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!'
+ assert replace('abc', '', '-') == '-a-b-c-'
+ assert replace('abc', '', '-', 3) == '-a-b-c'
+ assert replace('abc', '', '-', 0) == 'abc'
+ assert replace('', '', '') == ''
+ assert replace('', '', 'a') == 'a'
+ assert replace('abc', 'ab', '--', 0) == 'abc'
+ assert replace('abc', 'xy', '--') == 'abc'
+ assert replace('123', '123', '') == ''
+ assert replace('123123', '123', '') == ''
+ assert replace('123x123', '123', '') == 'x'
+
+def test_string_replace_overflow():
+ if sys.maxint > 2**31-1:
+ py.test.skip("Wrong platform")
+ s = "a" * (2**16)
+ with py.test.raises(OverflowError):
+ replace(s, "", s)
+ with py.test.raises(OverflowError):
+ replace(s, "a", s)
+ with py.test.raises(OverflowError):
+ replace(s, "a", s, len(s) - 10)
+
+def test_unicode_replace():
+ assert replace(u'one!two!three!', u'!', u'@', 1) == u'one at two!three!'
+ assert replace(u'one!two!three!', u'!', u'') == u'onetwothree'
+ assert replace(u'one!two!three!', u'!', u'@', 2) == u'one at two@three!'
+ assert replace(u'one!two!three!', u'!', u'@', 3) == u'one at two@three@'
+ assert replace(u'one!two!three!', u'!', u'@', 4) == u'one at two@three@'
+ assert replace(u'one!two!three!', u'!', u'@', 0) == u'one!two!three!'
+ assert replace(u'one!two!three!', u'!', u'@') == u'one at two@three@'
+ assert replace(u'one!two!three!', u'x', u'@') == u'one!two!three!'
+ assert replace(u'one!two!three!', u'x', u'@', 2) == u'one!two!three!'
+ assert replace(u'abc', u'', u'-') == u'-a-b-c-'
+ assert replace(u'abc', u'', u'-', 3) == u'-a-b-c'
+ assert replace(u'abc', u'', u'-', 0) == u'abc'
+ assert replace(u'', u'', u'') == u''
+ assert replace(u'', u'', u'a') == u'a'
+ assert replace(u'abc', u'ab', u'--', 0) == u'abc'
+ assert replace(u'abc', u'xy', u'--') == u'abc'
+ assert replace(u'123', u'123', u'') == u''
+ assert replace(u'123123', u'123', u'') == u''
+ assert replace(u'123x123', u'123', u'') == u'x'
+
+def test_unicode_replace_overflow():
+ if sys.maxint > 2**31-1:
+ py.test.skip("Wrong platform")
+ s = u"a" * (2**16)
+ with py.test.raises(OverflowError):
+ replace(s, u"", s)
+ with py.test.raises(OverflowError):
+ replace(s, u"a", s)
+ with py.test.raises(OverflowError):
+ replace(s, u"a", s, len(s) - 10)
+
+def test_startswith():
+ assert startswith('ab', 'ab') is True
+ assert startswith('ab', 'a') is True
+ assert startswith('ab', '') is True
+ assert startswith('x', 'a') is False
+ assert startswith('x', 'x') is True
+ assert startswith('', '') is True
+ assert startswith('', 'a') is False
+ assert startswith('x', 'xx') is False
+ assert startswith('y', 'xx') is False
+ assert startswith('ab', 'a', 0) is True
+ assert startswith('ab', 'a', 1) is False
+ assert startswith('ab', 'b', 1) is True
+ assert startswith('abc', 'bc', 1, 2) is False
+ assert startswith('abc', 'c', -1, 4) is True
+
+def test_endswith():
+ assert endswith('ab', 'ab') is True
+ assert endswith('ab', 'b') is True
+ assert endswith('ab', '') is True
+ assert endswith('x', 'a') is False
+ assert endswith('x', 'x') is True
+ assert endswith('', '') is True
+ assert endswith('', 'a') is False
+ assert endswith('x', 'xx') is False
+ assert endswith('y', 'xx') is False
+ assert endswith('abc', 'ab', 0, 2) is True
+ assert endswith('abc', 'bc', 1) is True
+ assert endswith('abc', 'bc', 2) is False
+ assert endswith('abc', 'b', -3, -1) is True
+
def test_string_builder():
s = StringBuilder()
s.append("a")
@@ -42,4 +161,32 @@
s.append_multiple_char(u'd', 4)
assert s.build() == 'aabcbdddd'
assert isinstance(s.build(), unicode)
-
+
+
+class TestTranslates(LLRtypeMixin, BaseRtypingTest):
+ def test_split_rsplit(self):
+ def fn():
+ res = True
+ res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd']
+ res = res and split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d']
+ res = res and split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd']
+ res = res and split(u'endcase test', u'test') == [u'endcase ', u'']
+ res = res and rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd']
+ res = res and rsplit('a//b//c//d', '//') == ['a', 'b', 'c', 'd']
+ res = res and rsplit(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd']
+ res = res and rsplit(u'a|b|c|d', u'|', 2) == [u'a|b', u'c', u'd']
+ res = res and rsplit(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd']
+ return res
+ res = self.interpret(fn, [])
+ assert res
+
+ def test_replace(self):
+ def fn():
+ res = True
+ res = res and replace('abc', 'ab', '--', 0) == 'abc'
+ res = res and replace('abc', 'xy', '--') == 'abc'
+ res = res and replace('abc', 'ab', '--', 0) == 'abc'
+ res = res and replace('abc', 'xy', '--') == 'abc'
+ return res
+ res = self.interpret(fn, [])
+ assert res
More information about the pypy-commit
mailing list