Break up list into groups
George Sakkis
george.sakkis at gmail.com
Wed Jul 18 20:35:46 EDT 2007
On Jul 17, 1:48 pm, Matimus <mccre... at gmail.com> wrote:
> I did some more experimenting and came up with the code below. It
> shows several methods. When run, the script tests the robustness of
> each method (roughly), and profiles it using timeit. The results from
> running on my laptop are shown below the code.
>
> <code>
> seqs = [# Original:
> [0xF0, 1, 2, 3, 0xF0, 4, 5, 6, 0xF1, 7, 8, 0xF2, 9, 10, 11,
> 12, 13,
> 0xF0, 14, 0xF1, 15],
> # Single entry:
> [0xF0, 1, 2, 3],
> # empty
> [],
> # No values with 0x80 set
> [1, 2, 3, 14, 15],
> # Does not start with a value that has 0x80 set
> [1, 2, 3, 14, 15, 0xF0, 1, 2, 3]]
>
> expected = [# Original:
> [[0xF0, 1, 2, 3], [0xF0, 4, 5, 6], [0xF1, 7, 8], [0xF2, 9,
> 10, 11, 12, 13],
> [0xF0, 14], [0xF1, 15]],
> # Single entry:
> [[0xF0, 1, 2, 3]],
> # empty
> [],
> # No values with 0x80 set
> [],
> # Does not start with a value that has 0x80 set
> [[0xF0, 1, 2, 3]]]
>
> def gengroups0(seq):
> group = None
> for val in seq:
> if val & 0x80:
> if group: yield group
> group = []
> try:
> group.append(val)
> except AttributeError:
> pass
> if group: yield group
>
> def getgroups0(seq):
> groups = []
> group = None
> for val in seq:
> if val & 0x80:
> if group:
> groups.append(group)
> group = []
> try:
> group.append(val)
> except AttributeError:
> pass
> if group:
> groups.append(group)
> return groups
>
> def gengroups1(seq):
> idxs = [i for i,v in enumerate(seq) if v&0x80]
> for i,j in zip(idxs,idxs[1:]+[None]):
> yield seq[i:j]
>
> def getgroups1(seq):
> idxs = [i for i,v in enumerate(seq) if v&0x80]
> return [seq[i:j] for i,j in zip(idxs,idxs[1:]+[None])]
>
> # Similar to the partition method on strings
> def partition(seq,f=None):
> if f is None:
> f = lambda x:x
> for i,v in enumerate(seq):
> if f(v):
> return seq[:i],[seq[i]],seq[i+1:]
> return seq,[],[]
>
> def rpartition(seq, f=None):
> if f is None:
> f = lambda x:x
> for i,v in zip(range(len(seq)-1,-1,-1),seq[::-1]):
> if f(v):
> return seq[:i],[seq[i]],seq[i+1:]
> return ([],[],seq)
>
> def gengroups2(seq):
> while seq:
> seq, key, val = rpartition(seq, lambda x: x&0x80)
> if key and val: yield key+val
>
> def getgroups2(seq):
> groups = []
> while seq:
> seq, key, val = rpartition(seq, lambda x: x&0x80)
> if key and val:
> groups.append(key+val)
> return groups
>
> def getgroups3(seq):
> groups = []
> for i in seq:
> if 0x80 & i:
> groups.append([i])
> else:
> groups[-1].append(i)
> return [x for x in groups if x]
>
> seq = seqs[0]
> if __name__ == "__main__":
> from timeit import Timer
> import __main__
> for i in range(4):
> fname = "getgroups"+str(i)
> f = getattr(__main__,fname)
> print fname
> for i,(s,e) in enumerate(zip(seqs,expected)):
> print "test %d:"%i,
> try:
> if f(s) == e:
> print "pass"
> else:
> print "fail"
> except:
> print "error"
>
> t = Timer(fname+'(seq)',
> 'from __main__ import seq,'+fname)
> print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/
> 100000)
> print
> </code>
>
> Output from running the above:
> getgroups0
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 14.85 usec/pass
>
> getgroups1
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 13.81 usec/pass
>
> getgroups2
> test 0: fail
> test 1: pass
> test 2: pass
> test 3: pass
> test 4: pass
> 56.38 usec/pass
>
> getgroups3
> test 0: pass
> test 1: pass
> test 2: pass
> test 3: error
> test 4: error
> 16.23 usec/pass
>
> `getgropus2' fails test 0 because it produces a reversed list. That
> can easily be fixed by re-reversing the output before returning. But,
> since it is by far the slowest method, I didn't bother.
>
> `getgroups3' is a method I got from another post in this thread, just
> for comparison.
>
> >From my benchmarks it looks like getgroups1 is the winner. I didn't
>
> scour the thread to test all the methods however.
Here's a small improvement of getgroups1, both in time and memory:
from itertools import islice
def getgroups4(seq):
idxs = [i for i,v in enumerate(seq) if v&0x80]
idxs.append(None)
return [seq[i:j] for i,j in izip(idxs, islice(idxs,1,None))]
George
More information about the Python-list
mailing list