Break up list into groups

Ron Adam rrr at ronadam.com
Tue Jul 17 14:26:20 EDT 2007



Matimus wrote:
> I did some more experimenting and came up with the code below. It
> shows several methods. When run, the script tests the robustness of
> each method (roughly), and profiles it using timeit. The results from
> running on my laptop are shown below the code.

Try this one...

def groupdata(data, fn):
     start = 0
     for n in range(1, len(data)):
         if fn(data[n]):
             yield data[start:n]
             start = n
     yield data[start:]

print list(groupdata(l, lambda x: x & 0x80))


Cheers ;-)
   Ron



> <code>
> seqs = [# Original:
>         [0xF0, 1, 2, 3, 0xF0, 4, 5, 6, 0xF1, 7, 8, 0xF2, 9, 10, 11,
> 12, 13,
>          0xF0, 14, 0xF1, 15],
>         # Single entry:
>         [0xF0, 1, 2, 3],
>         # empty
>         [],
>         # No values with 0x80 set
>         [1, 2, 3, 14, 15],
>         # Does not start with a value that has 0x80 set
>         [1, 2, 3, 14, 15, 0xF0, 1, 2, 3]]
> 
> expected = [# Original:
>             [[0xF0, 1, 2, 3], [0xF0, 4, 5, 6], [0xF1, 7, 8], [0xF2, 9,
> 10, 11, 12, 13],
>              [0xF0, 14], [0xF1, 15]],
>             # Single entry:
>             [[0xF0, 1, 2, 3]],
>             # empty
>             [],
>             # No values with 0x80 set
>             [],
>             # Does not start with a value that has 0x80 set
>             [[0xF0, 1, 2, 3]]]
> 
> def gengroups0(seq):
>     group = None
>     for val in seq:
>         if val & 0x80:
>             if group: yield group
>             group = []
>         try:
>             group.append(val)
>         except AttributeError:
>             pass
>     if group: yield group
> 
> def getgroups0(seq):
>     groups = []
>     group = None
>     for val in seq:
>         if val & 0x80:
>             if group:
>                 groups.append(group)
>             group = []
>         try:
>             group.append(val)
>         except AttributeError:
>             pass
>     if group:
>         groups.append(group)
>     return groups
> 
> def gengroups1(seq):
>     idxs = [i for i,v in enumerate(seq) if v&0x80]
>     for i,j in zip(idxs,idxs[1:]+[None]):
>         yield seq[i:j]
> 
> def getgroups1(seq):
>     idxs = [i for i,v in enumerate(seq) if v&0x80]
>     return [seq[i:j] for i,j in zip(idxs,idxs[1:]+[None])]
> 
> # Similar to the partition method on strings
> def partition(seq,f=None):
>     if f is None:
>         f = lambda x:x
>     for i,v in enumerate(seq):
>         if f(v):
>             return seq[:i],[seq[i]],seq[i+1:]
>     return seq,[],[]
> 
> def rpartition(seq, f=None):
>     if f is None:
>         f = lambda x:x
>     for i,v in zip(range(len(seq)-1,-1,-1),seq[::-1]):
>         if f(v):
>             return seq[:i],[seq[i]],seq[i+1:]
>     return ([],[],seq)
> 
> def gengroups2(seq):
>     while seq:
>         seq, key, val = rpartition(seq, lambda x: x&0x80)
>         if key and val: yield key+val
> 
> def getgroups2(seq):
>     groups = []
>     while seq:
>         seq, key, val = rpartition(seq, lambda x: x&0x80)
>         if key and val:
>             groups.append(key+val)
>     return groups
> 
> def getgroups3(seq):
>    groups = []
>    for i in seq:
>      if 0x80 & i:
>        groups.append([i])
>      else:
>        groups[-1].append(i)
>    return [x for x in groups if x]
> 
> seq = seqs[0]
> if __name__ == "__main__":
>     from timeit import Timer
>     import __main__
>     for i in range(4):
>         fname = "getgroups"+str(i)
>         f = getattr(__main__,fname)
>         print fname
>         for i,(s,e) in enumerate(zip(seqs,expected)):
>             print "test %d:"%i,
>             try:
>                 if f(s) == e:
>                     print "pass"
>                 else:
>                     print "fail"
>             except:
>                 print "error"
> 
>         t = Timer(fname+'(seq)',
>                    'from __main__ import seq,'+fname)
>         print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/
> 100000)
>         print
> </code>
> 
> Output from running the above:
> getgroups0
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 14.85 usec/pass
> 
> getgroups1
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 13.81 usec/pass
> 
> getgroups2
> test 0: fail
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 56.38 usec/pass
> 
> getgroups3
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: error
> test 4: error
> 16.23 usec/pass
> 
> `getgropus2' fails test 0 because it produces a reversed list. That
> can easily be fixed by re-reversing the output before returning. But,
> since it is by far the slowest method, I didn't bother.
> 
> `getgroups3' is a method I got from another post in this thread, just
> for comparison.
> 
>>From my benchmarks it looks like getgroups1 is the winner. I didn't
> scour the thread to test all the methods however.
> 



More information about the Python-list mailing list