[Python-checkins] r77240 - sandbox/trunk/stringbench/stringbench.py

Sat Jan 2 21:41:22 CET 2010

Author: antoine.pitrou
Date: Sat Jan  2 21:41:22 2010
New Revision: 77240

Log:
stringbench additions and fixes by Florent Xicluna (#7462)



Modified:
   sandbox/trunk/stringbench/stringbench.py

Modified: sandbox/trunk/stringbench/stringbench.py
==============================================================================

--- sandbox/trunk/stringbench/stringbench.py	(original)
+++ sandbox/trunk/stringbench/stringbench.py	Sat Jan  2 21:41:22 2010
@@ -9,6 +9,9 @@
 import datetime
 import optparse
 
+VERSION = '2.0'
+
+print 'stringbench v%s' % VERSION
 print sys.version
 print datetime.datetime.now()
 
@@ -32,7 +35,6 @@
 
 
 _RANGE_1000 = range(1000)
-_RANGE_1000 = range(1000)
 _RANGE_100 = range(100)
 _RANGE_10 = range(10)
 
@@ -116,12 +118,8 @@
 
 #### same tests as 'in' but use 'find'
 
-# XXX: TODO: Add rfind
-
-
-
 @bench('("A"*1000).find("A")', "early match, single character", 1000)
-def find_quick_match_single_character(STR):
+def find_test_quick_match_single_character(STR):
     s1 = STR("A" * 1000)
     s2 = STR("A")
     s1_find = s1.find
@@ -153,6 +151,14 @@
     for x in _RANGE_1000:
         s1_find(s2)
 
+ at bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
+def find_test_no_match_two_character_bis(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("CA")
+    s1_find = s1.find
+    for x in _RANGE_1000:
+        s1_find(s2)
+
 @bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
 def find_test_slow_match_two_characters(STR):
     s1 = STR("AB" * 300+"C")
@@ -161,7 +167,15 @@
     for x in _RANGE_1000:
         s1_find(s2)
 
- at bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s)',
+ at bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
+def find_test_slow_match_two_characters_bis(STR):
+    s1 = STR("AB" * 300+"CA")
+    s2 = STR("CA")
+    s1_find = s1.find
+    for x in _RANGE_1000:
+        s1_find(s2)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
        "late match, 100 characters", 100)
 def find_test_slow_match_100_characters(STR):
     m = STR("ABC"*33)
@@ -171,10 +185,99 @@
     for x in _RANGE_100:
         s1_find(s2)
 
+ at bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
+       "late match, 100 characters", 100)
+def find_test_slow_match_100_characters_bis(STR):
+    m = STR("ABC"*33)
+    s1 = (m+"D")*500 + "E"+m
+    s2 = "E"+m
+    s1_find = s1.find
+    for x in _RANGE_100:
+        s1_find(s2)
+
+
+#### Same tests for 'rfind'
+
+ at bench('("A"*1000).rfind("A")', "early match, single character", 1000)
+def rfind_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('("A"*1000).rfind("B")', "no match, single character", 1000)
+def rfind_test_no_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("B")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+
+ at bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
+def rfind_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
+def rfind_test_no_match_two_character(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("BC")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
+def rfind_test_no_match_two_character_bis(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("CA")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
+def rfind_test_slow_match_two_characters(STR):
+    s1 = STR("C" + "AB" * 300)
+    s2 = STR("CA")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
+def rfind_test_slow_match_two_characters_bis(STR):
+    s1 = STR("BC" + "AB" * 300)
+    s2 = STR("BC")
+    s1_rfind = s1.rfind
+    for x in _RANGE_1000:
+        s1_rfind(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
+       "late match, 100 characters", 100)
+def rfind_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = "E"+m + ("D"+m)*500
+    s2 = "E"+m
+    s1_rfind = s1.rfind
+    for x in _RANGE_100:
+        s1_rfind(s2)
+
+ at bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
+       "late match, 100 characters", 100)
+def rfind_test_slow_match_100_characters_bis(STR):
+    m = STR("ABC"*33)
+    s1 = m+"E" + ("D"+m)*500
+    s2 = m+"E"
+    s1_rfind = s1.rfind
+    for x in _RANGE_100:
+        s1_rfind(s2)
+
+
 #### Now with index.
 # Skip the ones which fail because that would include exception overhead.
-# Add rindex tests.
-
 
 @bench('("A"*1000).index("A")', "early match, single character", 1000)
 def index_test_quick_match_single_character(STR):
@@ -184,7 +287,6 @@
     for x in _RANGE_1000:
         s1_index(s2)
 
-
 @bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
 def index_test_quick_match_two_characters(STR):
     s1 = STR("AB" * 1000)
@@ -201,7 +303,7 @@
     for x in _RANGE_1000:
         s1_index(s2)
 
- at bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s)',
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
        "late match, 100 characters", 100)
 def index_test_slow_match_100_characters(STR):
     m = STR("ABC"*33)
@@ -211,6 +313,260 @@
     for x in _RANGE_100:
         s1_index(s2)
 
+
+#### Same for rindex
+
+ at bench('("A"*1000).rindex("A")', "early match, single character", 1000)
+def rindex_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_rindex = s1.rindex
+    for x in _RANGE_1000:
+        s1_rindex(s2)
+
+ at bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
+def rindex_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_rindex = s1.rindex
+    for x in _RANGE_1000:
+        s1_rindex(s2)
+
+ at bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
+def rindex_test_slow_match_two_characters(STR):
+    s1 = STR("C" + "AB" * 300)
+    s2 = STR("CA")
+    s1_rindex = s1.rindex
+    for x in _RANGE_1000:
+        s1_rindex(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
+       "late match, 100 characters", 100)
+def rindex_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = "E" + m + ("D"+m)*500
+    s2 = "E" + m
+    s1_rindex = s1.rindex
+    for x in _RANGE_100:
+        s1_rindex(s2)
+
+
+#### Same for partition
+
+ at bench('("A"*1000).partition("A")', "early match, single character", 1000)
+def partition_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_partition = s1.partition
+    for x in _RANGE_1000:
+        s1_partition(s2)
+
+ at bench('("A"*1000).partition("B")', "no match, single character", 1000)
+def partition_test_no_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("B")
+    s1_partition = s1.partition
+    for x in _RANGE_1000:
+        s1_partition(s2)
+
+
+ at bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
+def partition_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_partition = s1.partition
+    for x in _RANGE_1000:
+        s1_partition(s2)
+
+ at bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
+def partition_test_no_match_two_character(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("BC")
+    s1_partition = s1.partition
+    for x in _RANGE_1000:
+        s1_partition(s2)
+
+ at bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
+def partition_test_slow_match_two_characters(STR):
+    s1 = STR("AB" * 300+"C")
+    s2 = STR("BC")
+    s1_partition = s1.partition
+    for x in _RANGE_1000:
+        s1_partition(s2)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
+       "late match, 100 characters", 100)
+def partition_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = (m+"D")*500 + m+"E"
+    s2 = m+"E"
+    s1_partition = s1.partition
+    for x in _RANGE_100:
+        s1_partition(s2)
+
+
+#### Same for rpartition
+
+ at bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
+def rpartition_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_1000:
+        s1_rpartition(s2)
+
+ at bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
+def rpartition_test_no_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("B")
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_1000:
+        s1_rpartition(s2)
+
+
+ at bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
+def rpartition_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_1000:
+        s1_rpartition(s2)
+
+ at bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
+def rpartition_test_no_match_two_character(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("BC")
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_1000:
+        s1_rpartition(s2)
+
+ at bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
+def rpartition_test_slow_match_two_characters(STR):
+    s1 = STR("C" + "AB" * 300)
+    s2 = STR("CA")
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_1000:
+        s1_rpartition(s2)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
+       "late match, 100 characters", 100)
+def rpartition_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = "E" + m + ("D"+m)*500
+    s2 = "E" + m
+    s1_rpartition = s1.rpartition
+    for x in _RANGE_100:
+        s1_rpartition(s2)
+
+
+#### Same for split(s, 1)
+
+ at bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
+def split_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_split = s1.split
+    for x in _RANGE_1000:
+        s1_split(s2, 1)
+
+ at bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
+def split_test_no_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("B")
+    s1_split = s1.split
+    for x in _RANGE_1000:
+        s1_split(s2, 1)
+
+
+ at bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
+def split_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_split = s1.split
+    for x in _RANGE_1000:
+        s1_split(s2, 1)
+
+ at bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
+def split_test_no_match_two_character(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("BC")
+    s1_split = s1.split
+    for x in _RANGE_1000:
+        s1_split(s2, 1)
+
+ at bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
+def split_test_slow_match_two_characters(STR):
+    s1 = STR("AB" * 300+"C")
+    s2 = STR("BC")
+    s1_split = s1.split
+    for x in _RANGE_1000:
+        s1_split(s2, 1)
+
+ at bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
+       "late match, 100 characters", 100)
+def split_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = (m+"D")*500 + m+"E"
+    s2 = m+"E"
+    s1_split = s1.split
+    for x in _RANGE_100:
+        s1_split(s2, 1)
+
+
+#### Same for rsplit(s, 1)
+
+ at bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
+def rsplit_test_quick_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("A")
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_1000:
+        s1_rsplit(s2, 1)
+
+ at bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
+def rsplit_test_no_match_single_character(STR):
+    s1 = STR("A" * 1000)
+    s2 = STR("B")
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_1000:
+        s1_rsplit(s2, 1)
+
+
+ at bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
+def rsplit_test_quick_match_two_characters(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("AB")
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_1000:
+        s1_rsplit(s2, 1)
+
+ at bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
+def rsplit_test_no_match_two_character(STR):
+    s1 = STR("AB" * 1000)
+    s2 = STR("BC")
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_1000:
+        s1_rsplit(s2, 1)
+
+ at bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
+def rsplit_test_slow_match_two_characters(STR):
+    s1 = STR("C" + "AB" * 300)
+    s2 = STR("CA")
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_1000:
+        s1_rsplit(s2, 1)
+
+ at bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
+       "late match, 100 characters", 100)
+def rsplit_test_slow_match_100_characters(STR):
+    m = STR("ABC"*33)
+    s1 = "E" + m + ("D"+m)*500
+    s2 = "E" + m
+    s1_rsplit = s1.rsplit
+    for x in _RANGE_100:
+        s1_rsplit(s2, 1)
+
+
 #### Benchmark the operator-based methods
 
 @bench('"A"*10', "repeat 1 character 10 times", 1000)
@@ -382,6 +738,24 @@
     for x in _RANGE_1000:
         s_rsplit(N, 1)
 
+ at bench('("Here are some words. "*2).partition(" ")',
+       "split 1 whitespace", 1000)
+def whitespace_partition(STR):
+    sep = STR(" ")
+    s = STR("Here are some words. "*2)
+    s_partition = s.partition
+    for x in _RANGE_1000:
+        s_partition(sep)
+
+ at bench('("Here are some words. "*2).rpartition(" ")',
+       "split 1 whitespace", 1000)
+def whitespace_rpartition(STR):
+    sep = STR(" ")
+    s = STR("Here are some words. "*2)
+    s_rpartition = s.rpartition
+    for x in _RANGE_1000:
+        s_rpartition(sep)
+
 human_text = """\
 Python is a dynamic object-oriented programming language that can be
 used for many kinds of software development. It offers strong support
@@ -539,31 +913,35 @@
 
 @bench('GFF3_example.split("\\t")', "tab split", 1000)
 def tab_split_no_limit(STR):
+    sep = STR("\t")
     s = STR(GFF3_example)
     s_split = s.split
     for x in _RANGE_1000:
-        s_split("\t")
+        s_split(sep)
 
 @bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
 def tab_split_limit(STR):
+    sep = STR("\t")
     s = STR(GFF3_example)
     s_split = s.split
     for x in _RANGE_1000:
-        s_split("\t", 8)
+        s_split(sep, 8)
 
 @bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
 def tab_rsplit_no_limit(STR):
+    sep = STR("\t")
     s = STR(GFF3_example)
     s_rsplit = s.rsplit
     for x in _RANGE_1000:
-        s_rsplit("\t")
+        s_rsplit(sep)
 
 @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
 def tab_rsplit_limit(STR):
+    sep = STR("\t")
     s = STR(GFF3_example)
     s_rsplit = s.rsplit
     for x in _RANGE_1000:
-        s_rsplit("\t", 8)
+        s_rsplit(sep, 8)
 
 #### Count characters
 
@@ -996,6 +1374,8 @@
 
     for title, group in itertools.groupby(bench_functions,
                                       operator.itemgetter(0)):
+        # Flush buffer before each group
+        sys.stdout.flush()
         print "="*10, title
         for (_, k, v) in group:
             if hasattr(v, "is_bench"):
@@ -1024,7 +1404,7 @@
         print "That was zippy!"
     else:
         try:
-            ratio = str_time/uni_time
+            ratio = str_total/uni_total
         except ZeroDivisionError:
             ratio = 0.0
         print "%.2f\t%.2f\t%.1f\t%s" % (