[Python-checkins] r77240 - sandbox/trunk/stringbench/stringbench.py
antoine.pitrou
python-checkins at python.org
Sat Jan 2 21:41:22 CET 2010
Author: antoine.pitrou
Date: Sat Jan 2 21:41:22 2010
New Revision: 77240
Log:
stringbench additions and fixes by Florent Xicluna (#7462)
Modified:
sandbox/trunk/stringbench/stringbench.py
Modified: sandbox/trunk/stringbench/stringbench.py
==============================================================================
--- sandbox/trunk/stringbench/stringbench.py (original)
+++ sandbox/trunk/stringbench/stringbench.py Sat Jan 2 21:41:22 2010
@@ -9,6 +9,9 @@
import datetime
import optparse
+VERSION = '2.0'
+
+print 'stringbench v%s' % VERSION
print sys.version
print datetime.datetime.now()
@@ -32,7 +35,6 @@
_RANGE_1000 = range(1000)
-_RANGE_1000 = range(1000)
_RANGE_100 = range(100)
_RANGE_10 = range(10)
@@ -116,12 +118,8 @@
#### same tests as 'in' but use 'find'
-# XXX: TODO: Add rfind
-
-
-
@bench('("A"*1000).find("A")', "early match, single character", 1000)
-def find_quick_match_single_character(STR):
+def find_test_quick_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("A")
s1_find = s1.find
@@ -153,6 +151,14 @@
for x in _RANGE_1000:
s1_find(s2)
+ at bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
+def find_test_no_match_two_character_bis(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("CA")
+ s1_find = s1.find
+ for x in _RANGE_1000:
+ s1_find(s2)
+
@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
def find_test_slow_match_two_characters(STR):
s1 = STR("AB" * 300+"C")
@@ -161,7 +167,15 @@
for x in _RANGE_1000:
s1_find(s2)
- at bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s)',
+ at bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
+def find_test_slow_match_two_characters_bis(STR):
+ s1 = STR("AB" * 300+"CA")
+ s2 = STR("CA")
+ s1_find = s1.find
+ for x in _RANGE_1000:
+ s1_find(s2)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
"late match, 100 characters", 100)
def find_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
@@ -171,10 +185,99 @@
for x in _RANGE_100:
s1_find(s2)
+ at bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
+ "late match, 100 characters", 100)
+def find_test_slow_match_100_characters_bis(STR):
+ m = STR("ABC"*33)
+ s1 = (m+"D")*500 + "E"+m
+ s2 = "E"+m
+ s1_find = s1.find
+ for x in _RANGE_100:
+ s1_find(s2)
+
+
+#### Same tests for 'rfind'
+
+ at bench('("A"*1000).rfind("A")', "early match, single character", 1000)
+def rfind_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('("A"*1000).rfind("B")', "no match, single character", 1000)
+def rfind_test_no_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("B")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+
+ at bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
+def rfind_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
+def rfind_test_no_match_two_character(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("BC")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
+def rfind_test_no_match_two_character_bis(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("CA")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
+def rfind_test_slow_match_two_characters(STR):
+ s1 = STR("C" + "AB" * 300)
+ s2 = STR("CA")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
+def rfind_test_slow_match_two_characters_bis(STR):
+ s1 = STR("BC" + "AB" * 300)
+ s2 = STR("BC")
+ s1_rfind = s1.rfind
+ for x in _RANGE_1000:
+ s1_rfind(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
+ "late match, 100 characters", 100)
+def rfind_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = "E"+m + ("D"+m)*500
+ s2 = "E"+m
+ s1_rfind = s1.rfind
+ for x in _RANGE_100:
+ s1_rfind(s2)
+
+ at bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
+ "late match, 100 characters", 100)
+def rfind_test_slow_match_100_characters_bis(STR):
+ m = STR("ABC"*33)
+ s1 = m+"E" + ("D"+m)*500
+ s2 = m+"E"
+ s1_rfind = s1.rfind
+ for x in _RANGE_100:
+ s1_rfind(s2)
+
+
#### Now with index.
# Skip the ones which fail because that would include exception overhead.
-# Add rindex tests.
-
@bench('("A"*1000).index("A")', "early match, single character", 1000)
def index_test_quick_match_single_character(STR):
@@ -184,7 +287,6 @@
for x in _RANGE_1000:
s1_index(s2)
-
@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
def index_test_quick_match_two_characters(STR):
s1 = STR("AB" * 1000)
@@ -201,7 +303,7 @@
for x in _RANGE_1000:
s1_index(s2)
- at bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s)',
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
"late match, 100 characters", 100)
def index_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
@@ -211,6 +313,260 @@
for x in _RANGE_100:
s1_index(s2)
+
+#### Same for rindex
+
+ at bench('("A"*1000).rindex("A")', "early match, single character", 1000)
+def rindex_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_rindex = s1.rindex
+ for x in _RANGE_1000:
+ s1_rindex(s2)
+
+ at bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
+def rindex_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_rindex = s1.rindex
+ for x in _RANGE_1000:
+ s1_rindex(s2)
+
+ at bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
+def rindex_test_slow_match_two_characters(STR):
+ s1 = STR("C" + "AB" * 300)
+ s2 = STR("CA")
+ s1_rindex = s1.rindex
+ for x in _RANGE_1000:
+ s1_rindex(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
+ "late match, 100 characters", 100)
+def rindex_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = "E" + m + ("D"+m)*500
+ s2 = "E" + m
+ s1_rindex = s1.rindex
+ for x in _RANGE_100:
+ s1_rindex(s2)
+
+
+#### Same for partition
+
+ at bench('("A"*1000).partition("A")', "early match, single character", 1000)
+def partition_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_partition = s1.partition
+ for x in _RANGE_1000:
+ s1_partition(s2)
+
+ at bench('("A"*1000).partition("B")', "no match, single character", 1000)
+def partition_test_no_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("B")
+ s1_partition = s1.partition
+ for x in _RANGE_1000:
+ s1_partition(s2)
+
+
+ at bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
+def partition_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_partition = s1.partition
+ for x in _RANGE_1000:
+ s1_partition(s2)
+
+ at bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
+def partition_test_no_match_two_character(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("BC")
+ s1_partition = s1.partition
+ for x in _RANGE_1000:
+ s1_partition(s2)
+
+ at bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
+def partition_test_slow_match_two_characters(STR):
+ s1 = STR("AB" * 300+"C")
+ s2 = STR("BC")
+ s1_partition = s1.partition
+ for x in _RANGE_1000:
+ s1_partition(s2)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
+ "late match, 100 characters", 100)
+def partition_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = (m+"D")*500 + m+"E"
+ s2 = m+"E"
+ s1_partition = s1.partition
+ for x in _RANGE_100:
+ s1_partition(s2)
+
+
+#### Same for rpartition
+
+ at bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
+def rpartition_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_1000:
+ s1_rpartition(s2)
+
+ at bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
+def rpartition_test_no_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("B")
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_1000:
+ s1_rpartition(s2)
+
+
+ at bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
+def rpartition_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_1000:
+ s1_rpartition(s2)
+
+ at bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
+def rpartition_test_no_match_two_character(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("BC")
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_1000:
+ s1_rpartition(s2)
+
+ at bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
+def rpartition_test_slow_match_two_characters(STR):
+ s1 = STR("C" + "AB" * 300)
+ s2 = STR("CA")
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_1000:
+ s1_rpartition(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
+ "late match, 100 characters", 100)
+def rpartition_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = "E" + m + ("D"+m)*500
+ s2 = "E" + m
+ s1_rpartition = s1.rpartition
+ for x in _RANGE_100:
+ s1_rpartition(s2)
+
+
+#### Same for split(s, 1)
+
+ at bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
+def split_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_split = s1.split
+ for x in _RANGE_1000:
+ s1_split(s2, 1)
+
+ at bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
+def split_test_no_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("B")
+ s1_split = s1.split
+ for x in _RANGE_1000:
+ s1_split(s2, 1)
+
+
+ at bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
+def split_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_split = s1.split
+ for x in _RANGE_1000:
+ s1_split(s2, 1)
+
+ at bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
+def split_test_no_match_two_character(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("BC")
+ s1_split = s1.split
+ for x in _RANGE_1000:
+ s1_split(s2, 1)
+
+ at bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
+def split_test_slow_match_two_characters(STR):
+ s1 = STR("AB" * 300+"C")
+ s2 = STR("BC")
+ s1_split = s1.split
+ for x in _RANGE_1000:
+ s1_split(s2, 1)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
+ "late match, 100 characters", 100)
+def split_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = (m+"D")*500 + m+"E"
+ s2 = m+"E"
+ s1_split = s1.split
+ for x in _RANGE_100:
+ s1_split(s2, 1)
+
+
+#### Same for rsplit(s, 1)
+
+ at bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
+def rsplit_test_quick_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("A")
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_1000:
+ s1_rsplit(s2, 1)
+
+ at bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
+def rsplit_test_no_match_single_character(STR):
+ s1 = STR("A" * 1000)
+ s2 = STR("B")
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_1000:
+ s1_rsplit(s2, 1)
+
+
+ at bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
+def rsplit_test_quick_match_two_characters(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("AB")
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_1000:
+ s1_rsplit(s2, 1)
+
+ at bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
+def rsplit_test_no_match_two_character(STR):
+ s1 = STR("AB" * 1000)
+ s2 = STR("BC")
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_1000:
+ s1_rsplit(s2, 1)
+
+ at bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
+def rsplit_test_slow_match_two_characters(STR):
+ s1 = STR("C" + "AB" * 300)
+ s2 = STR("CA")
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_1000:
+ s1_rsplit(s2, 1)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
+ "late match, 100 characters", 100)
+def rsplit_test_slow_match_100_characters(STR):
+ m = STR("ABC"*33)
+ s1 = "E" + m + ("D"+m)*500
+ s2 = "E" + m
+ s1_rsplit = s1.rsplit
+ for x in _RANGE_100:
+ s1_rsplit(s2, 1)
+
+
#### Benchmark the operator-based methods
@bench('"A"*10', "repeat 1 character 10 times", 1000)
@@ -382,6 +738,24 @@
for x in _RANGE_1000:
s_rsplit(N, 1)
+ at bench('("Here are some words. "*2).partition(" ")',
+ "split 1 whitespace", 1000)
+def whitespace_partition(STR):
+ sep = STR(" ")
+ s = STR("Here are some words. "*2)
+ s_partition = s.partition
+ for x in _RANGE_1000:
+ s_partition(sep)
+
+ at bench('("Here are some words. "*2).rpartition(" ")',
+ "split 1 whitespace", 1000)
+def whitespace_rpartition(STR):
+ sep = STR(" ")
+ s = STR("Here are some words. "*2)
+ s_rpartition = s.rpartition
+ for x in _RANGE_1000:
+ s_rpartition(sep)
+
human_text = """\
Python is a dynamic object-oriented programming language that can be
used for many kinds of software development. It offers strong support
@@ -539,31 +913,35 @@
@bench('GFF3_example.split("\\t")', "tab split", 1000)
def tab_split_no_limit(STR):
+ sep = STR("\t")
s = STR(GFF3_example)
s_split = s.split
for x in _RANGE_1000:
- s_split("\t")
+ s_split(sep)
@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
def tab_split_limit(STR):
+ sep = STR("\t")
s = STR(GFF3_example)
s_split = s.split
for x in _RANGE_1000:
- s_split("\t", 8)
+ s_split(sep, 8)
@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
def tab_rsplit_no_limit(STR):
+ sep = STR("\t")
s = STR(GFF3_example)
s_rsplit = s.rsplit
for x in _RANGE_1000:
- s_rsplit("\t")
+ s_rsplit(sep)
@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
def tab_rsplit_limit(STR):
+ sep = STR("\t")
s = STR(GFF3_example)
s_rsplit = s.rsplit
for x in _RANGE_1000:
- s_rsplit("\t", 8)
+ s_rsplit(sep, 8)
#### Count characters
@@ -996,6 +1374,8 @@
for title, group in itertools.groupby(bench_functions,
operator.itemgetter(0)):
+ # Flush buffer before each group
+ sys.stdout.flush()
print "="*10, title
for (_, k, v) in group:
if hasattr(v, "is_bench"):
@@ -1024,7 +1404,7 @@
print "That was zippy!"
else:
try:
- ratio = str_time/uni_time
+ ratio = str_total/uni_total
except ZeroDivisionError:
ratio = 0.0
print "%.2f\t%.2f\t%.1f\t%s" % (
More information about the Python-checkins
mailing list