i have a big dictionary...:)

Jeff Epler jepler at unpythonic.net
Sat Jun 26 16:29:51 EDT 2004


If the database doesn't change, over a run, you can store in memory the
keys plus file offsets instead of values.

My on-disk file is of the format "key:value" separated by newlines.  I
You may be splitting on whitespace instead.

class SimpleDiskDict:
    def __init__(self, f, cachesize = 100):
        self.cachesize = cachesize
        self.cache = {}
        self.key2offset = {}
        self.f = open(f, "rb")
        pos = 0
        for line in self.f:
            key = line.split(":")[0]
            self.key2offset[key] = pos
            pos = pos + len(line)

    def __getitem__(self, item):
        if self.cache.has_key(item):
            return self.cache[item]
        o = self.key2offset[item]
        self.f.seek(o)
        value = self.f.readline().split(":", 1)[1].strip()
        if len(self.cache) > self.cachesize:
            self.cache.popitem()
        self.cache[item] = value
        return value
 
    def keys(self):
        return self.key2offset.keys()
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
URL: <http://mail.python.org/pipermail/python-list/attachments/20040626/b10785e9/attachment.sig>


More information about the Python-list mailing list