Break up list into groups

Matimus mccredie at gmail.com
Tue Jul 17 13:48:53 EDT 2007


I did some more experimenting and came up with the code below. It
shows several methods. When run, the script tests the robustness of
each method (roughly), and profiles it using timeit. The results from
running on my laptop are shown below the code.


<code>
seqs = [# Original:
        [0xF0, 1, 2, 3, 0xF0, 4, 5, 6, 0xF1, 7, 8, 0xF2, 9, 10, 11,
12, 13,
         0xF0, 14, 0xF1, 15],
        # Single entry:
        [0xF0, 1, 2, 3],
        # empty
        [],
        # No values with 0x80 set
        [1, 2, 3, 14, 15],
        # Does not start with a value that has 0x80 set
        [1, 2, 3, 14, 15, 0xF0, 1, 2, 3]]

expected = [# Original:
            [[0xF0, 1, 2, 3], [0xF0, 4, 5, 6], [0xF1, 7, 8], [0xF2, 9,
10, 11, 12, 13],
             [0xF0, 14], [0xF1, 15]],
            # Single entry:
            [[0xF0, 1, 2, 3]],
            # empty
            [],
            # No values with 0x80 set
            [],
            # Does not start with a value that has 0x80 set
            [[0xF0, 1, 2, 3]]]

def gengroups0(seq):
    group = None
    for val in seq:
        if val & 0x80:
            if group: yield group
            group = []
        try:
            group.append(val)
        except AttributeError:
            pass
    if group: yield group

def getgroups0(seq):
    groups = []
    group = None
    for val in seq:
        if val & 0x80:
            if group:
                groups.append(group)
            group = []
        try:
            group.append(val)
        except AttributeError:
            pass
    if group:
        groups.append(group)
    return groups

def gengroups1(seq):
    idxs = [i for i,v in enumerate(seq) if v&0x80]
    for i,j in zip(idxs,idxs[1:]+[None]):
        yield seq[i:j]

def getgroups1(seq):
    idxs = [i for i,v in enumerate(seq) if v&0x80]
    return [seq[i:j] for i,j in zip(idxs,idxs[1:]+[None])]

# Similar to the partition method on strings
def partition(seq,f=None):
    if f is None:
        f = lambda x:x
    for i,v in enumerate(seq):
        if f(v):
            return seq[:i],[seq[i]],seq[i+1:]
    return seq,[],[]

def rpartition(seq, f=None):
    if f is None:
        f = lambda x:x
    for i,v in zip(range(len(seq)-1,-1,-1),seq[::-1]):
        if f(v):
            return seq[:i],[seq[i]],seq[i+1:]
    return ([],[],seq)

def gengroups2(seq):
    while seq:
        seq, key, val = rpartition(seq, lambda x: x&0x80)
        if key and val: yield key+val

def getgroups2(seq):
    groups = []
    while seq:
        seq, key, val = rpartition(seq, lambda x: x&0x80)
        if key and val:
            groups.append(key+val)
    return groups

def getgroups3(seq):
   groups = []
   for i in seq:
     if 0x80 & i:
       groups.append([i])
     else:
       groups[-1].append(i)
   return [x for x in groups if x]

seq = seqs[0]
if __name__ == "__main__":
    from timeit import Timer
    import __main__
    for i in range(4):
        fname = "getgroups"+str(i)
        f = getattr(__main__,fname)
        print fname
        for i,(s,e) in enumerate(zip(seqs,expected)):
            print "test %d:"%i,
            try:
                if f(s) == e:
                    print "pass"
                else:
                    print "fail"
            except:
                print "error"

        t = Timer(fname+'(seq)',
                   'from __main__ import seq,'+fname)
        print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/
100000)
        print
</code>

Output from running the above:
getgroups0
test 0: pass
test 1: pass
test 2: pass
test 3: pass
test 4: pass
14.85 usec/pass

getgroups1
test 0: pass
test 1: pass
test 2: pass
test 3: pass
test 4: pass
13.81 usec/pass

getgroups2
test 0: fail
test 1: pass
test 2: pass
test 3: pass
test 4: pass
56.38 usec/pass

getgroups3
test 0: pass
test 1: pass
test 2: pass
test 3: error
test 4: error
16.23 usec/pass

`getgropus2' fails test 0 because it produces a reversed list. That
can easily be fixed by re-reversing the output before returning. But,
since it is by far the slowest method, I didn't bother.

`getgroups3' is a method I got from another post in this thread, just
for comparison.

>From my benchmarks it looks like getgroups1 is the winner. I didn't
scour the thread to test all the methods however.




More information about the Python-list mailing list