[Python-checkins] r46437 - sandbox/trunk/stringbench/stringbench.py

Sat May 27 12:37:46 CEST 2006

Author: andrew.dalke
Date: Sat May 27 12:37:45 2006
New Revision: 46437

Modified:
   sandbox/trunk/stringbench/stringbench.py
Log:
As I mentioned in a previous submit, the total numbers are meaningless.
Well, not quite meaningless, but not dirctly interpretable.  I did not
weight the different timings so the results are highly biased for
operations on long string or with long repeats.

I've rescaled some of the tests to bring things into closer agreement
with what I think are important.  Things are now less meaningless, but
still have high bogosity.


Modified: sandbox/trunk/stringbench/stringbench.py
==============================================================================

--- sandbox/trunk/stringbench/stringbench.py	(original)
+++ sandbox/trunk/stringbench/stringbench.py	Sat May 27 12:37:45 2006
@@ -32,6 +32,7 @@
 
 
 _RANGE_1000 = range(1000)
+_RANGE_1000 = range(1000)
 _RANGE_100 = range(100)
 _RANGE_10 = range(10)
 
@@ -271,21 +272,21 @@
 #### Benchmark join
 
 @bench('"A".join("")',
-       "join empty string, with 1 character sep", 1000)
+       "join empty string, with 1 character sep", 100)
 def join_empty_single(STR):
     sep = STR("A")
     s2 = STR("")
     sep_join = sep.join
-    for x in _RANGE_1000:
+    for x in _RANGE_100:
         sep_join(s2)
 
 @bench('"ABCDE".join("")',
-       "join empty string, with 5 character sep", 1000)
+       "join empty string, with 5 character sep", 100)
 def join_empty_5(STR):
     sep = STR("ABCDE")
     s2 = STR("")
     sep_join = sep.join
-    for x in _RANGE_1000:
+    for x in _RANGE_100:
         sep_join(s2)
 
 @bench('"A".join("ABC..Z")',
@@ -324,20 +325,20 @@
     for x in _RANGE_1000:
         sep_join(s2)
 
- at bench('"A".join(["Bob"]*1000))',
-       "join list of 1000 words, with 1 character sep", 1000)
-def join_1000_words_single(STR):
+ at bench('"A".join(["Bob"]*100))',
+       "join list of 100 words, with 1 character sep", 1000)
+def join_100_words_single(STR):
     sep = STR("A")
-    s2 = [STR("Bob")]*1000
+    s2 = [STR("Bob")]*100
     sep_join = sep.join
     for x in _RANGE_1000:
         sep_join(s2)
 
- at bench('"ABCDE".join(["Bob"]*1000))',
-       "join list of 1000 words, with 5 character sep", 1000)
-def join_1000_words_5(STR):
+ at bench('"ABCDE".join(["Bob"]*100))',
+       "join list of 100 words, with 5 character sep", 1000)
+def join_100_words_5(STR):
     sep = STR("ABCDE")
-    s2 = [STR("Bob")]*1000
+    s2 = [STR("Bob")]*100
     sep_join = sep.join
     for x in _RANGE_1000:
         sep_join(s2)
@@ -390,7 +391,7 @@
 
 Python is distributed under an OSI-approved open source license that
 makes it free to use, even for commercial products.
-"""*50
+"""*25
 human_text_unicode = unicode(human_text)
 def _get_human_text(STR):
     if STR is unicode:
@@ -399,18 +400,18 @@
         return human_text
     raise AssertionError
 
- at bench('human_text.split()', "split whitespace (huge)", 100)
+ at bench('human_text.split()', "split whitespace (huge)", 10)
 def whitespace_split_huge(STR):
     s = _get_human_text(STR)
     s_split = s.split
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_split()
 
- at bench('human_text.rsplit()', "split whitespace (huge)", 100)
+ at bench('human_text.rsplit()', "split whitespace (huge)", 10)
 def whitespace_rsplit_huge(STR):
     s = _get_human_text(STR)
     s_rsplit = s.rsplit
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_rsplit()
 
 
@@ -466,25 +467,25 @@
     raise AssertionError
         
 
- at bench('"...text...".split("\\n")', "split 2000 newlines", 100)
+ at bench('"...text...".split("\\n")', "split 2000 newlines", 10)
 def newlines_split_2000(STR):
     s = _get_2000_lines(STR)
     s_split = s.split
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_split("\n")
         
- at bench('"...text...".rsplit("\\n")', "split 2000 newlines", 100)
+ at bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
 def newlines_rsplit_2000(STR):
     s = _get_2000_lines(STR)
     s_rsplit = s.rsplit
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_rsplit("\n")
         
- at bench('"...text...".splitlines()', "split 2000 newlines", 100)
+ at bench('"...text...".splitlines()', "split 2000 newlines", 10)
 def newlines_splitlines_2000(STR):
     s = _get_2000_lines(STR)
     s_splitlines = s.splitlines
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_splitlines()
 
 
@@ -508,19 +509,19 @@
 
 ## split dna text on "ACTAT" characters
 @bench('dna.split("ACTAT")',
-       "split on multicharacter separator (dna)", 100)
+       "split on multicharacter separator (dna)", 10)
 def split_multichar_sep_dna(STR):
     s = _get_dna(STR)
     s_split = s.split
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_split("ACTAT")
         
 @bench('dna.rsplit("ACTAT")',
-       "split on multicharacter separator (dna)", 100)
+       "split on multicharacter separator (dna)", 10)
 def rsplit_multichar_sep_dna(STR):
     s = _get_dna(STR)
     s_rsplit = s.rsplit
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_rsplit("ACTAT")
 
 
@@ -562,11 +563,11 @@
 #### Count characters
 
 @bench('...text.with.2000.newlines.count("\\n")',
-       "count newlines", 100)
+       "count newlines", 10)
 def count_newlines(STR):
     s = _get_2000_lines(STR)
     s_count = s.count
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_count("\n")
 
 # Orchid sequences concatenated, from Biopython
@@ -623,7 +624,7 @@
 GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
 """
 _dna = "".join(_dna.splitlines())
-_dna = _dna * 50
+_dna = _dna * 25
 _dna_unicode = unicode(_dna)
 
 def _get_dna(STR):
@@ -633,11 +634,11 @@
         return _dna
     raise AssertionError
 
- at bench('dna.count("AACT")', "count AACT substrings in DNA example", 100)
+ at bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
 def count_aact(STR):
     seq = _get_dna(STR)
     seq_count = seq.count
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         seq_count("AACT")
 
 ##### startswith and endswith
@@ -800,35 +801,35 @@
 
 @uses_re
 @bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
-       'replace single character, big string', 100)
+       'replace single character, big string', 10)
 def replace_single_character_big_re(STR):
     s = _get_2000_lines(STR)
     pat = re.compile(STR("\n"))
     to_str = STR(" ")
     pat_sub = pat.sub
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         pat_sub(to_str, s)
 
 
 @bench('dna.replace("ATC", "ATT")',
-       'replace multiple characters, dna', 100)
+       'replace multiple characters, dna', 10)
 def replace_multiple_characters_dna(STR):
     seq = _get_dna(STR)
     from_str = STR("ATC")
     to_str = STR("ATT")
     seq_replace = seq.replace
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         seq_replace(from_str, to_str)
 
 # This changes the total number of character
 @bench('"...text.with.2000.newlines.',
-       'replace multiple characters, big string', 100)
+       'replace multiple characters, big string', 10)
 def replace_multiple_character_big(STR):
     s = _get_2000_lines(STR)
     from_str = STR("\n")
     to_str = STR("\r\n")
     s_replace = s.replace
-    for x in _RANGE_100:
+    for x in _RANGE_10:
         s_replace(from_str, to_str)
 
 # This increases the character count
@@ -855,7 +856,7 @@
         s_replace(from_str, to_str)
 
 
-big_s = "A" + ("Z"*1024*1024)
+big_s = "A" + ("Z"*128*1024)
 big_s_unicode = unicode(big_s)
 def _get_big_s(STR):
     if STR is unicode: return big_s_unicode
@@ -864,7 +865,7 @@
 
 # The older replace implementation counted all matches in
 # the string even when it only neeed to make one replacement.
- at bench('("A" + ("Z"*1024*1024)).replace("A", "BB", 1)',
+ at bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
        'quick replace single character match', 10)
 def quick_replace_single_match(STR):
     s = _get_big_s(STR)
@@ -874,7 +875,7 @@
     for x in _RANGE_10:
         s_replace(from_str, to_str, 1)
 
- at bench('("A" + ("Z"*1024*1024)).replace("AZZ", "BBZZ", 1)',
+ at bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
        'quick replace multiple character match', 10)
 def quick_replace_multiple_match(STR):
     s = _get_big_s(STR)
@@ -919,35 +920,35 @@
 
 #### Upper- and lower- case conversion
 
- at bench('"Where in the world is Carmen San Deigo?".lower()',
+ at bench('("Where in the world is Carmen San Deigo?"*10).lower()',
        "case conversion -- rare", 1000)
 def lower_conversion_rare(STR):
-    s = STR("Where in the world is Carmen San Deigo?")
+    s = STR("Where in the world is Carmen San Deigo?"*10)
     s_lower = s.lower
     for x in _RANGE_1000:
         s_lower()
 
- at bench('"WHERE IN THE WORLD IS CARMEN SAN DEIGO?".lower()',
+ at bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
        "case conversion -- dense", 1000)
 def lower_conversion_dense(STR):
-    s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?")
+    s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
     s_lower = s.lower
     for x in _RANGE_1000:
         s_lower()
 
 
- at bench('"wHERE IN THE WORLD IS cARMEN sAN dEIGO?".upper()',
+ at bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
        "case conversion -- rare", 1000)
 def upper_conversion_rare(STR):
-    s = STR("Where in the world is Carmen San Deigo?")
+    s = STR("Where in the world is Carmen San Deigo?"*10)
     s_upper = s.upper
     for x in _RANGE_1000:
         s_upper()
 
- at bench('"where in the world is carmen san deigo?".upper()',
+ at bench('("where in the world is carmen san deigo?"*10).upper()',
        "case conversion -- dense", 1000)
 def upper_conversion_dense(STR):
-    s = STR("where in the world is carmen san deigo?")
+    s = STR("where in the world is carmen san deigo?"*10)
     s_upper = s.upper
     for x in _RANGE_1000:
         s_upper()