[pypy-commit] pypy default: Speed up a little bit str.split('char'). Unsure why CPython is so much faster
arigo
noreply at buildbot.pypy.org
Sat May 28 16:59:57 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r44578:a9ccee57412b
Date: 2011-05-28 15:13 +0000
http://bitbucket.org/pypy/pypy/changeset/a9ccee57412b/
Log: Speed up a little bit str.split('char'). Unsure why CPython is so
much faster at doing this on a big string (benchmark: a 60MB string
with 3 million times the separator character).
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -252,15 +252,30 @@
res_w = []
start = 0
- while maxsplit != 0:
- next = value.find(by, start)
- if next < 0:
- break
- res_w.append(sliced(space, value, start, next, w_self))
- start = next + bylen
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
+ if bylen == 1 and maxsplit < 0:
+ # fast path: uses str.rfind(character) and str.count(character)
+ by = by[0] # annotator hack: string -> char
+ count = value.count(by)
+ res_w = [None] * (count + 1)
+ end = len(value)
+ while count >= 0:
+ assert end >= 0
+ prev = value.rfind(by, 0, end)
+ start = prev + 1
+ assert start >= 0
+ res_w[count] = sliced(space, value, start, end, w_self)
+ count -= 1
+ end = prev
+ else:
+ while maxsplit != 0:
+ next = value.find(by, start)
+ if next < 0:
+ break
+ res_w.append(sliced(space, value, start, next, w_self))
+ start = next + bylen
+ maxsplit -= 1 # NB. if it's already < 0, it stays < 0
+ res_w.append(sliced(space, value, start, len(value), w_self))
- res_w.append(sliced(space, value, start, len(value), w_self))
return space.newlist(res_w)
def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):
More information about the pypy-commit
mailing list