some kind of LFU dict...

Larry Bates lbates at syscononline.com
Fri Jan 28 19:45:14 EST 2005


Sounds like you want a database (e.g. on disk storage
of keys and values that get accessed via the key).

Take a look at one of the databases for storing your
key/value pairs.

Larry Bates


Joh wrote:
> Hello,
> 
> (first i'm sorry for my bad english...)
> 
> for a program, i need to have some kind of dictionary which will
> contain a huge amount of keys and values. i have tried 'to do it
> simple' using a normal dict but when available memory was exhausted,
> my computer started to be really slow as it swap-ed.
> 
> so i wondered if i can not use some kind of cache, i googled and found
> information on LRU, LFU, and LFU interested me : keep only most
> frequently used items inside dict (and memory) and writing others on
> disk for a possible future reuse.
> 
> here is a silly attempt to do it, it still missed some features but
> before to continue i would like to have your opinions. i'm interested
> in any propositions or refactorisations :) and btw i may have missed
> others already/tuned/better solutions...
> 
> best.
> 
> ---
> 
> import fileinput, pickle, random, shutil, sys
> 
> class sLFU:
>     TMP = """%s.bak"""
>     """ a simple kind of Least Frequently Used container"""
>     def __init__(self, size, ratio, filename = None):
>         """ filename = a temporary file which will could be deleted"""
>         self.d = {} #   container
>         self.size, self.ratio = size, ratio
>         self.freqs, self.fmin = {}, 0
>         self.filename = filename
>         if self.filename: open(self.filename, 'w')
>     def __setitem__(self, obj, v):
>         self.d[obj] = v
>         self.freqs[obj] = self.freqs.get(obj, self.fmin) + 1
>         if len(self.d) > self.size:
>             """ lfu = { (size / ratio) least frequently used
> objects... }"""
>             freqs = [(f, obj) for (obj, f) in self.freqs.items()]
>             # maybe should i use a generator () like
>             # ((f, obj) for (obj, f) in self.freqs.items())
>             freqs.sort()
>             lfu = {}
>             # and here enumerate(sorted(freqs))
>             for i, (f, obj) in enumerate(freqs):
>                 if i > self.size / self.ratio:
>                     break
>                 lfu[obj] = self.d[obj]
>             """ next minimal frequency will be max frequency of the
> lfu
>             (maybe it would be better to substract this from others in
>             self.freqs)"""
>             self.fmin = f
>             """   and now delete theses objects..."""
>             for obj in lfu:
>                     del self.freqs[obj]
>                     del self.d[obj]                
>             if self.filename:
>                 """ now must save lfu to disk...
>                 as i do not managed to do otherwise, copy file to a
> tmp
>                 file and read it line by line, updating when
> necessary...
>                 there must be plenty rooms for improvement here :("""
>                 shutil.copy(self.filename, self.TMP % self.filename)
>                 fhs = open(self.TMP % self.filename)
>                 fh = open(self.filename, 'w')
>                 """ flag to ignore 'value' line"""
>                 ignoreNext = False
>                 for i, line in enumerate(fhs):
>                     """ first copy old lfu which were already set,
> updating
>                     them if necessary..."""
>                     if ignoreNext:
>                         ignoreNext = False
>                         continue
>                     line = line.rstrip()
>                     if i % 2 == 0:
>                         obj = self.loads(line)
>                         if obj not in lfu and obj in self.d:
>                             """ obj available from memory, do not to
>                             keep it on disk..."""
>                             ignoreNext = True
>                             continue
>                         elif obj in lfu:
>                             """ obj was available from memory, but as
> a lfu,
>                             was removed and must be updated on disk"""
>                             fh.write("%s\n" % line)
>                             fh.write("%s\n" % self.dumps(lfu[obj]))
>                             del lfu[obj]
>                             ignoreNext = True
>                             continue
>                     """ default behaviour : copy line..."""
>                     fh.write("%s\n" % line)
>                 """ from now lfu should contain only unseen lfu
> objects,
>                 add them to file..."""
>                 fh = open(self.filename, 'a')
>                 for obj in lfu:
>                     fh.write("%s\n" % self.dumps(obj))
>                     fh.write("%s\n" % self.dumps(lfu[obj]))
>     def __getitem__(self, obj):
>         if obj in self.d:
>             """ from memory :)"""
>             return self.d[obj]
>         if self.filename:
>             """ if a filename was provided, search inside file if
>             such an obj is present, then return it ; else raise
>             IndexErrror."""
>             fh = open(self.filename)
>             found = False
>             for i, line in enumerate(fh):
>                 line = line.rstrip()
>                 if found:
>                     v = self.loads(line)
>                     self.d[obj] = v
>                     self.freqs[obj] = self.fmin + 1
>                     return v
>                 if obj == self.loads(line):
>                     found = True
>         raise KeyError(obj)
>     """ maybe class methods would have been better here,
>     actually i would have liked static + class...
>     totally arbitrary format, haved choosed to use pickle,
>     odd line contain 'obj'
>     (next) even line contain 'value'
>     """
>     def dumps(self, s):
>         return pickle.dumps(s).replace("\n", "\t")
>     staticmethod(dumps)            
>     def loads(self, s):
>         return pickle.loads(s.replace("\t", "\n"))
>     staticmethod(loads)            
>         
> 
> 
> lfu = sLFU(2, 2, 'slfu.txt')
> lfu[1] 
> 8  
> = {'1': 
> 1fd2
> 'a'}
> lfu[2] = []
> lfu[3] = '3'
> lfu[2] = [1,]
> lfu[2] = [1,2]
> lfu[4] = 4
> lfu[5] = '5'
> 
> print "#d", len(lfu.d)
> print lfu.d
> print "".join(open(lfu.filename).readlines())



More information about the Python-list mailing list