[Tutor] Re: Comparing lists - timings

Wed, 10 Jul 2002 12:02:31 +0545

This is a multi-part message in MIME format.
--------------080802070107090108030902
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Derrick 'dman' Hudson wrote:
[snip]
> # build your lists first, however you do that
> l1 = [data]
> l2 = [data]
> 
> # put all the elements of the first list into the dictionary as keys
> d1 = {}
> for item in l1 :
>     d1[item] = None
> 
> for item in l2 :
>     if d1.has_key( item ) :
>         print item , "is in both lists"
>     else :
>         print item , "is NOT in both lists"
> 
[snip]

Derrick, you are just great! Nice algo!

Here's the timings on my system:
dict is Derricks algo and list is the old everybody's algo
smaller is faster

with dups
size    dict    list    results match
    100 0.001959 0.005347 1
   1000 0.022661 0.087229 1
  10000 0.223140 0.908544 1
  50000 1.114087 4.359394 1

with out dups
size    dict    list    results match
    100 0.001510 0.008115 1
   1000 0.016246 0.796433 1
   5000 0.080216 20.643879 1

and the test program is attached.

Ashish

--------------080802070107090108030902
Content-Type: text/plain;
 name="perf.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="perf.py"

import time
import random

def generateList1(listSize):
    " list can have repeated numbers"
    l = []
    while len(l) < listSize:
        l.append(random.randint(1,100))
    return l

def generateList2(listSize):
    " list has only unique numbers"
    l = []
    while len(l) < listSize:
        i = random.randint(1, 1000000L)
        if i not in l:
            l.append(i)
    return l

def duplicateDict(list1, list2):
    # while not exactly same, i believe it is similar and
    # basically follows the algo given
    d1 = {}
    result = []
    for item in list1:
        d1[item] = None
    for item in list2:
        if d1.has_key(item):
            result.append(item) # present in both list
    return result

def duplicateList(list1, list2):
    # the number of time a number appears in the resulting
    # list depends on the number of times it occurs in the
    # list we iterate over so to be same as duplicateDict
    # we iterate over list2
    return [i for i in list2 if i in list1]

def timeIt(sizes, listFunction):
    print 'size    dict    list   results match'
    for s in sizes:
        list1 = listFunction(s)
        list2 = listFunction(s)

        startTime = time.time()
        dups1 = duplicateDict(list1, list2)
        endTime = time.time()
        dictTime = endTime - startTime

        startTime = time.time()
        dups2 = duplicateList(list1, list2)
        endTime = time.time()
        listTime = endTime - startTime

        print '%6d %f %f %d' % (s, dictTime, listTime, dups1 == dups2)

print 'with dups'
timeIt([100, 1000, 10000, 50000], generateList1)
print
print 'with out dups'
timeIt([100, 1000, 5000], generateList2)

--------------080802070107090108030902--