how to find the longst element list of lists

Peter Otten __peter__ at web.de
Tue Jan 9 07:23:11 EST 2007


Steven D'Aprano wrote:

> On Mon, 08 Jan 2007 13:55:40 +0100, Peter Otten wrote:
> 
>>> The precise results depend on the version of Python you're running, the
>>> amount of memory you have, other processes running, and the details of
>>> what's in the list you are trying to sort. But as my test shows, sort
>>> has some overhead that makes it a trivial amount slower for sufficiently
>>> small lists, but for everything else you're highly unlikely to beat it.
>> 
>> Try again with tN.timeit(1) and a second list that is random.shuffle()d
>> and copied to L before each measurement. list.sort() treats already
>> sorted lists specially.
> 
> Or, simply shuffle the list itself. Why copy it?

To feed the same data to every algorithm. This is an act of fairness, though
with negligable impact on the benchmark results, I suspect :-)
 
> In my tests, sorting still wins, and by roughly the same factor.
 
> One optimization that might shift the balance would be to remove the
> list copying in the non-sort code (list_of_lists[1:]). That's going to be
> very time consuming for big lists.
 
With the tweak that you suggest above the loop wins for 100 items on my
machine...

         N       loop      iloop        max       sort
         1   0.000008   0.000019   0.000012   0.000010
        10   0.000008   0.000008   0.000009   0.000013
       100   0.000095   0.000037   0.000028   0.000088
      1000   0.000374   0.000341   0.001304   0.001204
      5000   0.001930   0.001719   0.001212   0.007062

if you can trust the timings for small values of N. 
The script to generate this table has become somewhat baroque :-)

import random
import timeit

timers = []
def bench(f):
    t = timeit.Timer("getlongest(L)", "from __main__ import %s as
getlongest, L, items; L[:] = items" % f.__name__)
    t.name = f.__name__.split("_")[-1]
    t.function = f
    timers.append(t)
    return f

@bench
def getlongest_loop(lists):
    longest_list = lists[0]
    longest_length = len(longest_list)
    for a_list in lists[1:]:
        a_length = len(a_list)
        if a_length > longest_length:
            longest_list, longest_length = a_list, a_length
    return longest_list

@bench
def getlongest_iloop(lists):
    lists = iter(lists)
    longest_list = lists.next()
    longest_length = len(longest_list)
    for a_list in lists:
        a_length = len(a_list)
        if a_length > longest_length:
            longest_list, longest_length = a_list, a_length
    return longest_list

@bench
def getlongest_max(lists):
    return max(lists, key=len)

@bench
def getlongest_sort(lists):
    lists.sort(key=len)
    return lists[-1]

def make_list_of_lists(length):
    lol = [[None]*i for i in xrange(length)]
    random.shuffle(lol)
    return lol

def measure(N):
    print "%10d" % N,
    for t in timers:
        print "%10.6f" % t.timeit(1),
    print

if __name__ == "__main__":
    import sys
    if "--test" in sys.argv:
        L = make_list_of_lists(100)
        expected = [None]*99
        for t in timers:
            assert t.function(L[:]) == expected
        raise SystemExit
    L = []
    print "N".rjust(10),
    print " ".join(t.name.rjust(10) for t in timers)
    for N in [1, 10, 100, 1000, 5000]:
        items = make_list_of_lists(N)
        measure(N)

Peter



More information about the Python-list mailing list