Best way to handle large lists?

durumdara durumdara at gmail.com
Wed Oct 4 04:06:46 EDT 2006


Hi !
> Thanks Jeremy. I am in the process of converting my stuff to use sets! I
> wouldn't have thought it would have made that big a deal! I guess it is
> live and learn.
>   
If you have simplified records with big amount of data, you can trying 
dbhash. With this you don't get out from memory...

dd


import dbhash
import time
import random
import gc
import sys

itemcount = 250000

db = dbhash.open('test.dbh','w')
for i in range(itemcount):
    db[str(i)] = str(i)

littlelist = []
littleset = set()

while len(littlelist) < 1000:
    x = str(random.randint(0, itemcount-1))
    if not (x in littlelist):
        littlelist.append(x)
        littleset.add(x)

def DBHash():
    gc.collect()
    hk = db.has_key
    st = time.time()
    newlist = []
    for val in littlelist:
        if hk(val):
            newlist.append(val)
    et = time.time()
    print "Size", len(newlist)
    newlist.sort()
    print "Hash", hash(str(newlist))
    print "Time", "%04f"%(et-st)
    print

def Set():
    gc.collect()
    largeset = set()
    for i in range(itemcount):
        largeset.add(str(i))

    st = time.time()
    newset = largeset.intersection(littleset)
    newsetlist = []
    while newset:
        newsetlist.append(newset.pop())
    et = time.time()
    print "Size", len(newsetlist)
    newsetlist.sort()
    print "Hash", hash(str(newsetlist))
    print "Time", "%04f"%(et-st)

DBHash()
Set()






More information about the Python-list mailing list