list conversion question

Andrew Dalke adalke at mindspring.com
Sat Sep 4 16:27:32 EDT 2004


Peter Otten wrote:
> Yet another option, not benchmarked, perhaps clearer:

>>>>hist = [0, 1, 0, 5, 43]
>>>>indices = range(len(hist))
>>>>indices.sort(key=hist.__getitem__)
>>>>indices
> 
> [0, 2, 1, 3, 4]

Looks the fastest to me.  Here's my results, with
the benchmark below.  In this list:
   sort0 = Paul McGuire's solution with a lambda
   sort1 = my solution with the pair flipped so the
             default sort works
   sort2 = my solution using Python 2.4's "keys"
   sort3 = Jp Calderone's variant of that with itemgetter
   sort4 = your (Petter Otten's) solution

   list of size 0
sort0 0.0932559967041
sort1 0.0773079395294
sort2 0.176142930984
sort3 0.23094701767
sort4 0.089989900589
   list of size 10
sort0 0.866734981537
sort1 0.524667978287
sort2 0.553129911423
sort3 0.529242992401
sort4 0.265748023987
   list of size 100
sort0 18.9233298302
sort1 5.04968500137
sort2 5.11950612068
sort3 4.75884985924
sort4 3.0580329895
   list of size 1000
sort0 262.217221022
sort1 70.5779349804
sort2 69.9501719475
sort3 54.6528921127
sort4 39.5281660557

Here's my benchmark code


import random, operator

def make_random_list(n):
     # Ensure there will be duplicates
     choose_from = range(n//3+1)
     return [random.choice(choose_from) for i in range(n)]

def do_sort0(hist):
     values = [ i for i in enumerate(hist)]
     values.sort(lambda a,b: cmp(b[1],a[1]))
     return [ a for a,b in values ]

def do_sort1(hist):
     pairs = [(value, offset) for (offset, value) in enumerate(hist)]
     pairs.sort()
     return [offset for (value, offset) in pairs]

def do_sort2(hist):
     return [pair[0] for pair in sorted(enumerate(hist),
                                 key=lambda pair: pair[1])]

def do_sort3(hist):
     return map(operator.itemgetter(0), sorted(enumerate(hist),
                                        key=operator.itemgetter(1)))

def do_sort4(hist):
     indices = range(len(hist))
     indices.sort(key=hist.__getitem__)
     return indices

data = make_random_list(100)
assert (do_sort0(data) == do_sort1(data) == do_sort2(data) ==
         do_sort3(data) == do_sort4(data))

import timeit

for size in [0, 10, 100, 1000]:
     print "  list of size", size
     for i in range(5):
       name = "sort%d" % i
       t = timeit.Timer(
         setup= ("import __main__\nhist=__main__.make_random_list(%d)" % 
size),
         stmt = "__main__.do_%s(hist)" % name)
       print name, t.timeit(10000)

					Andrew
					dalke at dalkescientific.com



More information about the Python-list mailing list