Best way to handle large lists?
durumdara
durumdara at gmail.com
Wed Oct 4 04:06:46 EDT 2006
Hi !
> Thanks Jeremy. I am in the process of converting my stuff to use sets! I
> wouldn't have thought it would have made that big a deal! I guess it is
> live and learn.
>
If you have simplified records with big amount of data, you can trying
dbhash. With this you don't get out from memory...
dd
import dbhash
import time
import random
import gc
import sys
itemcount = 250000
db = dbhash.open('test.dbh','w')
for i in range(itemcount):
db[str(i)] = str(i)
littlelist = []
littleset = set()
while len(littlelist) < 1000:
x = str(random.randint(0, itemcount-1))
if not (x in littlelist):
littlelist.append(x)
littleset.add(x)
def DBHash():
gc.collect()
hk = db.has_key
st = time.time()
newlist = []
for val in littlelist:
if hk(val):
newlist.append(val)
et = time.time()
print "Size", len(newlist)
newlist.sort()
print "Hash", hash(str(newlist))
print "Time", "%04f"%(et-st)
print
def Set():
gc.collect()
largeset = set()
for i in range(itemcount):
largeset.add(str(i))
st = time.time()
newset = largeset.intersection(littleset)
newsetlist = []
while newset:
newsetlist.append(newset.pop())
et = time.time()
print "Size", len(newsetlist)
newsetlist.sort()
print "Hash", hash(str(newsetlist))
print "Time", "%04f"%(et-st)
DBHash()
Set()
More information about the Python-list
mailing list