naive string question
Steven Taschuk
staschuk at telusplanet.net
Wed Mar 5 02:24:51 EST 2003
Quoth Létezõ:
> Your problem can be solved with an impressive list comprehension:
>
> b= [(i,p) for i,c,p in zip(range(1,len(a)),a[1:],a[:-1]) if c!=p]
>
> The above solution is more pythonical, but could be slower. I did not
> measured their speed.
Once more unto the benchmark, dear friends, and stop the gaps with
our poor-performing functions!
2.43 1.000 original: Kurowski's original version.
2.55 1.049 listcomp: Viktor's impressive list comprehension.
2.03 0.835 listcomp2: A more literal translation of the original.
1.84 0.757 optimized: An optimized version of the original.
4.86 2.000 regex: For the regular expression addict.
4.47 1.840 reducer: An abomination.
(Processor seconds on my machine to run each function 10,000 times
on Kurowski's sample string; ratios of those times; name and brief
description of the implementation. On my machine, the times vary
by up to a few hundredths of a second on successive runs.)
The last two entries are for amusement only.
Code:
#!/usr/bin/env python
from __future__ import division
import re
import time
def original(a):
"""Kurowski's original version."""
b = []
for i in range(1, len(a)):
if a[i] != a[i-1]:
b.append((i, a[i-1]))
return b
def listcomp(a):
"""Viktor's impressive list comprehension."""
return [(i,p) for i,c,p in zip(range(1,len(a)),a[1:],a[:-1]) if c!=p]
def listcomp2(a):
"""A more literal translation of the original."""
return [(i, a[i-1]) for i in range(1, len(a)) if a[i] != a[i-1]]
def optimized(a):
"""An optimized version of the original.
Optimizations include moving a name lookup out of the loop,
remembering the value currently being repeated instead of fetching
a[i-1], and iterating over the string instead of indexing into it.
(It turns out we can also get away with iterating over a instead
of a[1:], and thereby avoid making a whole new string. For the
test string, this is slightly faster despite the extra run of the
loop; this should hold except for tiny strings.)
"""
b = []
if a:
append = b.append
prev = a[0]
i = 0
for cur in a:
if cur != prev:
append((i, prev))
prev = cur
i += 1
return b
_pat = re.compile(r'(.)\1*')
def regex(a):
"""For the regular expression addict."""
return [(match.end(), a[match.start()])
for match in _pat.finditer(a)][:-1]
def reducer(a):
"""An abomination."""
b = []
reduce(lambda x,y: x[1] != y[1] and b.append(x) or y,
zip(range(1, len(a)+1), a), (0, a and a[0]))
return b
def test(f):
for input, expected in [
# based on the behaviour of the original
('', []),
('a', []),
('aa', []),
('ab', [(1, 'a')]),
('abb', [(1, 'a')]),
('abbc', [(1, 'a'), (3, 'b')]),
('aaa-ab-a-bbbaa', [(3, 'a'), (4, '-'), (5, 'a'),
(6, 'b'), (7, '-'), (8, 'a'), (9, '-'), (12, 'b')]),
]:
actual = f(input)
if actual != expected:
assert 0, '%s(%r) == %r, not %r' % (f.__name__,
input, actual, expected)
basis = None
for f in (original, listcomp, listcomp2, optimized, regex, reducer):
test(f)
start = time.clock()
for _ in xrange(10000):
result = f('aaa-ab-a-bbbaa')
end = time.clock()
elapsed = end - start
if basis is None:
basis = elapsed
print '%.2f %.3f %s' % (elapsed, elapsed/basis,
f.__name__ + ': ' + f.__doc__.splitlines()[0])
--
Steven Taschuk staschuk at telusplanet.net
"Its force is immeasurable. Even Computer cannot determine it."
-- _Space: 1999_ episode "Black Sun"
More information about the Python-list
mailing list