key/value store optimized for disk storage

Emile van Sebille emile at fenx.com
Fri May 4 15:14:18 EDT 2012


On 5/4/2012 10:46 AM Tim Chase said...

I hit a few snags testing this on my winxp w/python2.6.1 in that getsize 
wasn't finding the file as it was created in two parts with .dat and 
.dir extension.

Also, setting key failed as update returns None.

The changes I needed to make are marked below.

Emile

>    import os
>    import hashlib
>    import random
>    from string import letters
>
>    import anydbm
>
>    KB = 1024
>    MB = KB * KB
>    GB = MB * KB
>    DESIRED_SIZE = 1 * GB
>    KEYS_TO_SAMPLE = 20
>    FNAME = "mydata.db"

FDATNAME = r"mydata.db.dat"

>
>    i = 0
>    md5 = hashlib.md5()
>    db = anydbm.open(FNAME, 'c')
>    try:
>      print("Generating junk data...")
>      while os.path.getsize(FNAME)<  6*GB:

   while os.path.getsize(FDATNAME) < 6*GB:

>        key = md5.update(str(i))[:16]

     md5.update(str(i))
     key = md5.hexdigest()[:16]

>        size = random.randrange(1*KB, 4*KB)
>        value = ''.join(random.choice(letters)
>          for _ in range(size))
>        db[key] = value
>        i += 1
>      print("Gathering %i sample keys" % KEYS_TO_SAMPLE)
>      keys_of_interest = random.sample(db.keys(), KEYS_TO_SAMPLE)
>    finally:
>      db.close()
>
>    print("Reopening for a cold sample set in case it matters")
>    db = anydbm.open(FNAME)
>    try:
>      print("Performing %i lookups")
>      for key in keys_of_interest:
>        v = db[key]
>      print("Done")
>    finally:
>      db.close()
>





More information about the Python-list mailing list