Refactor a buffered class...

Michael Spencer mahs at telcopartners.com
Thu Sep 7 00:52:12 EDT 2006


George Sakkis wrote:
> Michael Spencer wrote:
> 
>> Here's a small update to the generator that allows optional handling of the head
>> and the tail:
>>
>> def chunker(s, chunk_size=3, sentry=".", keep_first = False, keep_last = False):
>>      buffer=[]
...
> 
> And here's a (probably) more efficient version, using a deque as a
> buffer:
> 

Perhaps the deque-based solution is more efficient under some conditions, but 
it's significantly slower for all the cases I tested:

Here are some typical results:

Using George's deque buffer:
 >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=300)
'get_chunks(...)  30 iterations, 16.70msec per call'
 >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=30)
'get_chunks(...)  35 iterations, 14.56msec per call'
 >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=3)
'get_chunks(...)  35 iterations, 14.41msec per call'

Using the list buffer
 >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=300)
'get_chunks(...)  85 iterations, 5.88msec per call'
 >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=30)
'get_chunks(...)  85 iterations, 5.89msec per call'
 >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=3)
'get_chunks(...)  83 iterations, 6.03msec per call'
 >>>

Test functions follow:

def make_seq(groups = 1000, words_per_group = 3, word_length = 76, sentry = "."):
     """Make a sequence of test input for chunker

     >>> make_seq(groups = 5, words_per_group=5, word_length = 2, sentry="%")
         ['WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%',
         'WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%',
         'WW', 'WW', 'WW', 'WW', 'WW', '%']
         """
     word = "W"*word_length
     group = [word]*words_per_group+[sentry]
     return group*groups

def time_chunkers(chunk_func, groups = 1000, words_per_group=10, chunk_size=3):
     """Test harness for chunker functions"""
     seq = make_seq(groups)
     def get_chunks(chunk_func, seq):
         return list(chunk_func(seq))
     return timefunc(get_chunks, chunk_func, seq)

def _get_timer():
     import sys
     import time
     if sys.platform == "win32":
         return time.clock
     else:
         return time.time
     return

def timefunc(func, *args, **kwds):
     timer = _get_timer()
     count, totaltime = 0, 0
     while totaltime < 0.5:
         t1 = timer()
         res = func(*args, **kwds)
         t2 = timer()
         totaltime += (t2-t1)
         count += 1
     if count > 1000:
         unit = "usec"
         timeper = totaltime * 1000000 / count
     else:
         unit = "msec"
         timeper = totaltime * 1000 / count
     return "%s(...)  %s iterations, %.2f%s per call" % \
                 (func.__name__, count, timeper, unit)








More information about the Python-list mailing list