[Tutor] Re: Comparing lists - timings
Ashish
asis@graffiti.net
Wed, 10 Jul 2002 12:02:31 +0545
This is a multi-part message in MIME format.
--------------080802070107090108030902
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
Derrick 'dman' Hudson wrote:
[snip]
> # build your lists first, however you do that
> l1 = [data]
> l2 = [data]
>
> # put all the elements of the first list into the dictionary as keys
> d1 = {}
> for item in l1 :
> d1[item] = None
>
> for item in l2 :
> if d1.has_key( item ) :
> print item , "is in both lists"
> else :
> print item , "is NOT in both lists"
>
[snip]
Derrick, you are just great! Nice algo!
Here's the timings on my system:
dict is Derricks algo and list is the old everybody's algo
smaller is faster
with dups
size dict list results match
100 0.001959 0.005347 1
1000 0.022661 0.087229 1
10000 0.223140 0.908544 1
50000 1.114087 4.359394 1
with out dups
size dict list results match
100 0.001510 0.008115 1
1000 0.016246 0.796433 1
5000 0.080216 20.643879 1
and the test program is attached.
Ashish
--------------080802070107090108030902
Content-Type: text/plain;
name="perf.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="perf.py"
import time
import random
def generateList1(listSize):
" list can have repeated numbers"
l = []
while len(l) < listSize:
l.append(random.randint(1,100))
return l
def generateList2(listSize):
" list has only unique numbers"
l = []
while len(l) < listSize:
i = random.randint(1, 1000000L)
if i not in l:
l.append(i)
return l
def duplicateDict(list1, list2):
# while not exactly same, i believe it is similar and
# basically follows the algo given
d1 = {}
result = []
for item in list1:
d1[item] = None
for item in list2:
if d1.has_key(item):
result.append(item) # present in both list
return result
def duplicateList(list1, list2):
# the number of time a number appears in the resulting
# list depends on the number of times it occurs in the
# list we iterate over so to be same as duplicateDict
# we iterate over list2
return [i for i in list2 if i in list1]
def timeIt(sizes, listFunction):
print 'size dict list results match'
for s in sizes:
list1 = listFunction(s)
list2 = listFunction(s)
startTime = time.time()
dups1 = duplicateDict(list1, list2)
endTime = time.time()
dictTime = endTime - startTime
startTime = time.time()
dups2 = duplicateList(list1, list2)
endTime = time.time()
listTime = endTime - startTime
print '%6d %f %f %d' % (s, dictTime, listTime, dups1 == dups2)
print 'with dups'
timeIt([100, 1000, 10000, 50000], generateList1)
print
print 'with out dups'
timeIt([100, 1000, 5000], generateList2)
--------------080802070107090108030902--