Simple lru cache?
Bengt Richter
bokr at oz.net
Sun Oct 6 01:23:45 EDT 2002
On Sat, 05 Oct 2002 10:10:33 GMT, Alex Martelli <aleax at aleax.it> wrote:
>Steve Holden wrote:
> ...
>>> Or is there a way to pick one element randomly from a list
>>> and move it to front or back and shift the rest to make place,
>>> all in a single python statement, ie. without using a for statement?
>>>
>> What, you mean like
>>
>> mylst = mylst[:n] + mylst[n+1:] + [mylst[n]]
>>
>> for example?
>
>This builds a new list, rather than altering the old one. It's
>easy to fix so it alters the old one, of course (probably only
>relevant if you have other names for the same list, but maybe a
>little bit faster too):
>
>mylst[n:] = mylst[n+1:] + [mylst[n]]
>
>
>Somebody later mentioned that O(m) behavior might be a problem,
>but that's inevitable when you want to "shift the rest" (m items).
>Moving m items within a list is O(m). If that's no good, then
>a Python list is not the right data structure -- don't let the
>name fool you, it's not implemented as a linked-list but rather
>as a vector or array -- contiguous storage for references to
>items. So accessing mylst[n] is O(1), but shifting items around
>ain't. You can easily build lisp-like lists (use a 2-item list
>to represent each node's car/cdr for example) if that's what you
>want, or other kinds of linked lists (e.g. doubly-linked) since
>everything's being done by-reference anyway. I see from other
>posts in this thread that several other viable alternatives have
>also been proposed.
>
>One that I haven't seen is based on dictionaries -- may not be
>best, but it's interesting, at least (it's somewhat of a
>surprise that a dict is often the best way to implement a
>FIFO queue in Python, for example -- now this case is a bit
>different, but...).
>
>Say that what we want is to be able to access the X LRU items
>(e.g. to remove them) and that items are hashable. Then:
>
>oldest = newest = 0
>item_to_age = {}
>age_to_item = {}
>
>def use_item(IT):
> newest += 1
> item_to_age[IT] = newest
> age_to_item[newest] = IT
>
>def prune_X_items(X):
> pruned = 0
> while pruned < X and oldest < newest:
> oldest += 1
> IT = age_to_item.get(oldest)
> if IT is not None:
> del age_to_item[oldest]
> del item_to_age[IT]
> pruned += 1
>
>Now the computational cost of "use_item" is pretty darn
>close to O(1) [thanks to the performance characteristics
>of Python's dicts] -- although it's harder to estimate
>the computational cost of "prune_X_items", as it depends
>on how much "churning" there has been (I'd guess it's
>probably no good if older items are OFTEN 'used' again).
>
Here's a class that uses both linked list and dictionary
in a way I hope keeps the dictionary from growing, and
should be about constant time. It also has a general interface.
I just did this todaay, so no guarantees ;-)
----< mrucache.py >--------
# mrucache.py -- defines class MRUCache
# Takes capacity, a valueCalc function, and an optional hashCalc function
# to make an MRU/LRU cache instance with a getValue method that either retrieves
# cached value or calculates a new one. Either way, makes the value MRU.
# Alpha version. NO WARRANTY. Use at your own risk
# Copyright (c) 2002 Bengt Richter 2001-10-05. All rights reserved.
# Use per Python Software Foundation (PSF) license.
#
class UseNode:
"""For linked list kept in most-recent .. least-recent *use* order"""
__slots__ = ['value','hkey','older','newer']
def __init__(self, value, hkey, older=None, newer=None):
self.value = value # as returned by user valueCalc function
self.hkey = hkey # defaults to arg tuple for valueCalc, or else
# result of user hashCalc function called with those args
self.older = older # link to node not as recently used as current
self.newer = newer # Note that list is circular: mru.newer is lru
# and lru.older is mru, which is the reference point.
class MRUCache:
"""
Produces cache object with given capacity for MRU/LRU list.
Uses user-supplied valueCalc function when it can't find value in cache.
Uses optional user-supplied hashCalc function to make key for finding
cached values or uses valueCalc arg tuple as key.
MRU/LRU list is initialized with a dummy node to a circular list of length one.
This node becomes LRU and gets overwritten when the list fills to capacity.
"""
def __init__(self,
capacity, # max number of simultaneous cache MRU values kept
valueCalc, # user function to calculate actual value from args
hashCalc=None, # normally takes same args as valueCalc if present
):
""" """
self.capacity = capacity
self.hashCalc = hashCalc
self.valueCalc = valueCalc
self.mru = UseNode(`'<unused value>'`, '<*initially unused node*>') # ``for test
self.mru.older = self.mru.newer = self.mru # init circular list
self.nodeCount = 1
self.nodeDict = dict([(self.mru.hkey, self.mru)])
def getValue(self, *args): # magically hidden whether lookup or calc
"""
Get value from cache or calcuate a new value using user function.
Either way, make the new value the most recently used, replacing
the least recently used if cache is full.
"""
if self.hashCalc:
hkey = self.hashCalc(*args)
else:
hkey = args # use tuple of args as default key for first stage LU
try:
node = self.nodeDict[hkey]
assert node.hkey == hkey
except KeyError:
# here we know we can't get to value
# calculate new value
value = self.valueCalc(*args)
# get mru use node if cache is full, else make new node
lru = self.mru.newer # newer than mru circularly goes to lru node
if self.nodeCount<self.capacity:
# put new node between existing lru and mru
node = UseNode(value, hkey, self.mru, lru)
self.nodeCount += 1
# update links on both sides
self.mru.newer = node # newer from old mru is new mru
lru.older = node # older than lru poits circularly to mru
# make new node the mru
self.mru = node
else:
# position of lru node is correct for becoming mru so
# jusr replace value and hkey #
lru.value = value
# delete invalidated key->node mapping
del self.nodeDict[lru.hkey]
lru.hkey = hkey
self.mru = lru # new lru is next newer from before
self.nodeDict[hkey] = self.mru # add new key->node mapping
return value
# Here we have a valid node. Just update its position in linked lru list
# we want take node from older <=> node <=> newer
# and put it in lru <=> node <=> mru and then make new node the mru
# first cut it out unless it's first or last
if node is self.mru: # nothing to do
return node.value
lru = self.mru.newer # circles from newest to oldest
if node is lru:
self.mru = lru # just backs up the circle one notch
return lru.value
# must be between somewhere, so cut it out first
node.older.newer = node.newer # older neighbor points to newer neighbor
node.newer.older = node.older # newer neighbor points to older neighbor
# then put it between current lru and mru
node.older = self.mru # current mru is now older
self.mru.newer = node
node.newer = lru # newer than new mru circles to lru
lru.older = node
self.mru = node # new node is new mru
return node.value
def clear(self):
"""
Clear out circular list and dictionary of cached nodes.
Re-init empty with same capacity and user functions
for posssible continued use.
"""
this = self.mru
lru = this.newer
while 1:
next = this.older
this.older = this.newer = None
del this
this = next
if this is lru: break
this.older = this.newer = None
del this
self.nodeDict.clear()
# re-init with previous parameters
self.__init__(self.capacity, self.valueCalc, self.hashCalc)
####################
# test stuff follows
def testcalc(*args): # plays role of user valueCalc
return '<V:%s>' % `args`
def testhash(*args): # plays role of user hashCalc
return '<H:%s>' % `args`
def test():
cache = MRUCache(5, testcalc, testhash)
def mrustr(mru):
node = mru; s=[]
while 1:
s.append(node.value.split("'")[1])
node = node.older
if node is mru: break
return ''.join(s)
def dosome(s):
sbef = mrustr(cache.mru)
for calcArg in s:
v = cache.getValue(calcArg)
print '\n--- getValue(%s) ---\n' % calcArg
mru = node = cache.mru
while 1:
print node.hkey, node.value, '(older:%s, newer:%s)'%(
node.older.hkey, node.newer.hkey)
node = node.older
if node is mru: break
saft = mrustr(cache.mru)
print 'MRU %s +refs %s => %s' %(sbef,s,saft)
dosome('aabcdef') # leaves mru..lru = fedcb
dosome('fb') # bfedc
dosome('f') # fbedc
dosome('d') # dfbec
cache.clear()
for k,v in vars(cache).items():
print '%12s = %s' % (`k`,`v`)
dosome(')-;? ')
cache.clear()
for k,v in vars(cache).items():
print '%12s = %s' % (`k`,`v`)
if __name__ == '__main__':
test()
---------------------------
Regards,
Bengt Richter
More information about the Python-list
mailing list