shelve error

Fiona Brewster Fiona.Brewster at wanadoo.fr
Tue Jul 17 08:11:28 EDT 2001


Could someone explain what this means:

Traceback (most recent call last):
	File "index.py", line 83, in ?
		tokenCount = process(fileIndex[item], item, tokenCount)
	File "index.py", line 61, in process
		post[token] = fileIdMap
	File "c:\program files\python21\lib\shelve.py", line 77, in __setitem__
		self.dict[key] = f.getvalue()
	bsddb.error: (0, 'Error')

I have got this error after processing 1900 files and 3.8m words. The
program is part of a simplistic IR system which does the indexing to create
a postings database (kept in a shelve file). As it took about an hour of
indexing to reach the error I am loath to step through 3.8m words to see
what the error is!

I enclose the program below.
Thank you for any light you may shed.

Christopher Brewster

----------------------------------------------------------------------------
----

import shelve, string

# variables
post = shelve.open("postings", 'n')
#post = {}
tokenCount = 0

#####################
# Functions
####################

# This function reads a file, creates a list containing all tokens and ...
def process(fileName, fileId, tokenCount):
	f = open(fileName)
	fileText = f.read()
	tokens = string.split(fileText)
	# cycle over the words in the text
	i = 0
	for i in range(len(tokens)):
		if (tokenCount%100000 == 0): print tokenCount
		token = tokens[i]
		tokenCount = tokenCount +1
#		print "Current token is ", token
		wordOffset = i
#		print "Word offset is: ", wordOffset
		# check if the word exists as a key in 'post'
		if (post.has_key(token)):
#			print "Token found in post"
			currentFileMap = post[token]
#			print "CurrentFilemap = ", currentFileMap
			# check if the file id exists as a key in post[token] aka currentFileMap
			if (currentFileMap.has_key(fileId)):
#				print "File id-map found: ", post[token][fileId]
				tempMap = post[token]
#				print "tempMap = ", tempMap
				tempMap[fileId].append(wordOffset)
#				print "tempMap[fileId] after appending: ", tempMap[fileId]
				del post[token]
				post[token] = tempMap
#				print "post[token] now is: ", post[token]
#				print "New token offset appended: ", post[token][fileId]
			else:
#				print "File id not found"
				post[token][fileId] = [wordOffset]
#				print "post[token][fileId]= ", post[token][fileId]
		else:
#			print "Word not found in post"
			# create offset list
			offsetList = [wordOffset]
			# create fileId --> list map
			fileIdMap = {fileId:offsetList}
			# add token --> fileIdMap item to post
			post[token] = fileIdMap
#			print "New entries created", post[token][fileId]
#		raw_input()
	return tokenCount

######################
# Main Program
######################
fileCount = 0
fileIndex = anydbm.open('filedb', 'r')
for item in fileIndex.keys():
#	if (fileCount > 100): break
	if (fileCount%100 == 0): print "file count = ", fileCount
#	print "Now working on file: ", fileIndex[item]
	tokenCount = process(fileIndex[item], item, tokenCount)
	fileCount = fileCount + 1






More information about the Python-list mailing list