shelve error
Fiona Brewster
Fiona.Brewster at wanadoo.fr
Tue Jul 17 08:11:28 EDT 2001
Could someone explain what this means:
Traceback (most recent call last):
File "index.py", line 83, in ?
tokenCount = process(fileIndex[item], item, tokenCount)
File "index.py", line 61, in process
post[token] = fileIdMap
File "c:\program files\python21\lib\shelve.py", line 77, in __setitem__
self.dict[key] = f.getvalue()
bsddb.error: (0, 'Error')
I have got this error after processing 1900 files and 3.8m words. The
program is part of a simplistic IR system which does the indexing to create
a postings database (kept in a shelve file). As it took about an hour of
indexing to reach the error I am loath to step through 3.8m words to see
what the error is!
I enclose the program below.
Thank you for any light you may shed.
Christopher Brewster
----------------------------------------------------------------------------
----
import shelve, string
# variables
post = shelve.open("postings", 'n')
#post = {}
tokenCount = 0
#####################
# Functions
####################
# This function reads a file, creates a list containing all tokens and ...
def process(fileName, fileId, tokenCount):
f = open(fileName)
fileText = f.read()
tokens = string.split(fileText)
# cycle over the words in the text
i = 0
for i in range(len(tokens)):
if (tokenCount%100000 == 0): print tokenCount
token = tokens[i]
tokenCount = tokenCount +1
# print "Current token is ", token
wordOffset = i
# print "Word offset is: ", wordOffset
# check if the word exists as a key in 'post'
if (post.has_key(token)):
# print "Token found in post"
currentFileMap = post[token]
# print "CurrentFilemap = ", currentFileMap
# check if the file id exists as a key in post[token] aka currentFileMap
if (currentFileMap.has_key(fileId)):
# print "File id-map found: ", post[token][fileId]
tempMap = post[token]
# print "tempMap = ", tempMap
tempMap[fileId].append(wordOffset)
# print "tempMap[fileId] after appending: ", tempMap[fileId]
del post[token]
post[token] = tempMap
# print "post[token] now is: ", post[token]
# print "New token offset appended: ", post[token][fileId]
else:
# print "File id not found"
post[token][fileId] = [wordOffset]
# print "post[token][fileId]= ", post[token][fileId]
else:
# print "Word not found in post"
# create offset list
offsetList = [wordOffset]
# create fileId --> list map
fileIdMap = {fileId:offsetList}
# add token --> fileIdMap item to post
post[token] = fileIdMap
# print "New entries created", post[token][fileId]
# raw_input()
return tokenCount
######################
# Main Program
######################
fileCount = 0
fileIndex = anydbm.open('filedb', 'r')
for item in fileIndex.keys():
# if (fileCount > 100): break
if (fileCount%100 == 0): print "file count = ", fileCount
# print "Now working on file: ", fileIndex[item]
tokenCount = process(fileIndex[item], item, tokenCount)
fileCount = fileCount + 1
More information about the Python-list
mailing list