some kind of LFU dict...
Larry Bates
lbates at syscononline.com
Fri Jan 28 19:45:14 EST 2005
Sounds like you want a database (e.g. on disk storage
of keys and values that get accessed via the key).
Take a look at one of the databases for storing your
key/value pairs.
Larry Bates
Joh wrote:
> Hello,
>
> (first i'm sorry for my bad english...)
>
> for a program, i need to have some kind of dictionary which will
> contain a huge amount of keys and values. i have tried 'to do it
> simple' using a normal dict but when available memory was exhausted,
> my computer started to be really slow as it swap-ed.
>
> so i wondered if i can not use some kind of cache, i googled and found
> information on LRU, LFU, and LFU interested me : keep only most
> frequently used items inside dict (and memory) and writing others on
> disk for a possible future reuse.
>
> here is a silly attempt to do it, it still missed some features but
> before to continue i would like to have your opinions. i'm interested
> in any propositions or refactorisations :) and btw i may have missed
> others already/tuned/better solutions...
>
> best.
>
> ---
>
> import fileinput, pickle, random, shutil, sys
>
> class sLFU:
> TMP = """%s.bak"""
> """ a simple kind of Least Frequently Used container"""
> def __init__(self, size, ratio, filename = None):
> """ filename = a temporary file which will could be deleted"""
> self.d = {} # container
> self.size, self.ratio = size, ratio
> self.freqs, self.fmin = {}, 0
> self.filename = filename
> if self.filename: open(self.filename, 'w')
> def __setitem__(self, obj, v):
> self.d[obj] = v
> self.freqs[obj] = self.freqs.get(obj, self.fmin) + 1
> if len(self.d) > self.size:
> """ lfu = { (size / ratio) least frequently used
> objects... }"""
> freqs = [(f, obj) for (obj, f) in self.freqs.items()]
> # maybe should i use a generator () like
> # ((f, obj) for (obj, f) in self.freqs.items())
> freqs.sort()
> lfu = {}
> # and here enumerate(sorted(freqs))
> for i, (f, obj) in enumerate(freqs):
> if i > self.size / self.ratio:
> break
> lfu[obj] = self.d[obj]
> """ next minimal frequency will be max frequency of the
> lfu
> (maybe it would be better to substract this from others in
> self.freqs)"""
> self.fmin = f
> """ and now delete theses objects..."""
> for obj in lfu:
> del self.freqs[obj]
> del self.d[obj]
> if self.filename:
> """ now must save lfu to disk...
> as i do not managed to do otherwise, copy file to a
> tmp
> file and read it line by line, updating when
> necessary...
> there must be plenty rooms for improvement here :("""
> shutil.copy(self.filename, self.TMP % self.filename)
> fhs = open(self.TMP % self.filename)
> fh = open(self.filename, 'w')
> """ flag to ignore 'value' line"""
> ignoreNext = False
> for i, line in enumerate(fhs):
> """ first copy old lfu which were already set,
> updating
> them if necessary..."""
> if ignoreNext:
> ignoreNext = False
> continue
> line = line.rstrip()
> if i % 2 == 0:
> obj = self.loads(line)
> if obj not in lfu and obj in self.d:
> """ obj available from memory, do not to
> keep it on disk..."""
> ignoreNext = True
> continue
> elif obj in lfu:
> """ obj was available from memory, but as
> a lfu,
> was removed and must be updated on disk"""
> fh.write("%s\n" % line)
> fh.write("%s\n" % self.dumps(lfu[obj]))
> del lfu[obj]
> ignoreNext = True
> continue
> """ default behaviour : copy line..."""
> fh.write("%s\n" % line)
> """ from now lfu should contain only unseen lfu
> objects,
> add them to file..."""
> fh = open(self.filename, 'a')
> for obj in lfu:
> fh.write("%s\n" % self.dumps(obj))
> fh.write("%s\n" % self.dumps(lfu[obj]))
> def __getitem__(self, obj):
> if obj in self.d:
> """ from memory :)"""
> return self.d[obj]
> if self.filename:
> """ if a filename was provided, search inside file if
> such an obj is present, then return it ; else raise
> IndexErrror."""
> fh = open(self.filename)
> found = False
> for i, line in enumerate(fh):
> line = line.rstrip()
> if found:
> v = self.loads(line)
> self.d[obj] = v
> self.freqs[obj] = self.fmin + 1
> return v
> if obj == self.loads(line):
> found = True
> raise KeyError(obj)
> """ maybe class methods would have been better here,
> actually i would have liked static + class...
> totally arbitrary format, haved choosed to use pickle,
> odd line contain 'obj'
> (next) even line contain 'value'
> """
> def dumps(self, s):
> return pickle.dumps(s).replace("\n", "\t")
> staticmethod(dumps)
> def loads(self, s):
> return pickle.loads(s.replace("\t", "\n"))
> staticmethod(loads)
>
>
>
> lfu = sLFU(2, 2, 'slfu.txt')
> lfu[1]
> 8
> = {'1':
> 1fd2
> 'a'}
> lfu[2] = []
> lfu[3] = '3'
> lfu[2] = [1,]
> lfu[2] = [1,2]
> lfu[4] = 4
> lfu[5] = '5'
>
> print "#d", len(lfu.d)
> print lfu.d
> print "".join(open(lfu.filename).readlines())
More information about the Python-list
mailing list