[Spambayes-checkins] spambayes/Outlook2000 manager.py,1.41,1.42
Mark Hammond
mhammond at users.sourceforge.net
Mon Feb 3 18:19:48 EST 2003
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv11936
Modified Files:
manager.py
Log Message:
If a new bsddb, or bsddb3 module is available, use this instead of a pickle. If this is available on your system, you will need to do a full retrain.
Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -d -r1.41 -r1.42
*** manager.py 14 Jan 2003 05:38:19 -0000 1.41
--- manager.py 4 Feb 2003 02:19:46 -0000 1.42
***************
*** 4,7 ****
--- 4,8 ----
import os
import sys
+ import errno
import win32com.client
***************
*** 34,37 ****
--- 35,51 ----
this_filename = os.path.abspath(sys.argv[0])
+ # See if we can use the new bsddb module. (The old one is unreliable
+ # on Windows, so we don't use that)
+ try:
+ from bsddb import db # This name doesn't exist in the old one.
+ use_db = True
+ except ImportError:
+ # See if the explicit bsddb3 module exists.
+ try:
+ import bsddb3
+ use_db = True
+ except ImportError:
+ use_db = False
+
# This is a little bit of a hack <wink>. We are generally in a child directory
# of the bayes code. To help installation, we handle the fact that this may
***************
*** 40,44 ****
# spambayes code before setting that envar, our .ini file may have no effect).
def import_core_spambayes_stuff(ini_filename):
! global bayes_classifier, bayes_tokenize
os.environ["BAYESCUSTOMIZE"] = ini_filename
--- 54,58 ----
# spambayes code before setting that envar, our .ini file may have no effect).
def import_core_spambayes_stuff(ini_filename):
! global bayes_classifier, bayes_tokenize, bayes_storage
os.environ["BAYESCUSTOMIZE"] = ini_filename
***************
*** 52,61 ****
--- 66,137 ----
from spambayes import classifier
from spambayes.tokenizer import tokenize
+ from spambayes import storage
bayes_classifier = classifier
bayes_tokenize = tokenize
+ bayes_storage = storage
class ManagerError(Exception):
pass
+ # Base class for our "storage manager" - we choose between the pickle
+ # and DB versions at runtime. As our bayes uses spambayes.storage,
+ # our base class can share common bayes loading code.
+ class BasicStorageManager:
+ db_extension = None # for pychecker - overwritten by subclass
+ def __init__(self, bayes_base_name, mdb_base_name):
+ self.bayes_filename = bayes_base_name + self.db_extension
+ self.mdb_filename = mdb_base_name + self.db_extension
+ def new_bayes(self):
+ # Just delete the file and do an "open"
+ try:
+ os.unlink(self.bayes_filename)
+ except IOError, e:
+ if e.errno != errno.ENOENT: raise
+ return self.open_bayes()
+ def store_bayes(self, bayes):
+ bayes.store()
+ def open_bayes(self):
+ raise NotImplementedError
+
+ class PickleStorageManager(BasicStorageManager):
+ db_extension = ".pck"
+ def open_bayes(self):
+ return bayes_storage.PickledClassifier(self.bayes_filename)
+ def close_bayes(self, bayes):
+ pass
+ def open_mdb(self):
+ return cPickle.load(open(self.mdb_filename, 'rb'))
+ def new_mdb(self):
+ return {}
+ def store_mdb(self, mdb):
+ cPickle.dump(mdb, open(self.mdb_filename,"wb"), 1)
+ def close_mdb(self, mdb):
+ pass
+
+ class DBStorageManager(BasicStorageManager):
+ db_extension = ".db"
+ def open_bayes(self):
+ return bayes_storage.DBDictClassifier(self.bayes_filename)
+ def close_bayes(self, bayes):
+ bayes.db.close()
+ bayes.dbm.close()
+ def open_mdb(self):
+ try:
+ import bsddb
+ except ImportError:
+ import bsddb3 as bsddb
+ return bsddb.hashopen(self.mdb_filename)
+ def new_mdb(self):
+ try:
+ os.unlink(self.mdb_filename)
+ except IOError, e:
+ if e.errno != errno.ENOENT: raise
+ return self.open_mdb()
+ def store_mdb(self, mdb):
+ mdb.sync()
+ def close_mdb(self, mdb):
+ mdb.close()
+
+ # Our main "bayes manager"
class BayesManager:
def __init__(self, config_base="default", outlook=None, verbose=1):
***************
*** 68,76 ****
config_base = os.path.abspath(config_base)
self.ini_filename = config_base + "_bayes_customize.ini"
- self.bayes_filename = config_base + "_bayes_database.pck"
- self.message_db_filename = config_base + "_message_database.pck"
self.config_filename = config_base + "_configuration.pck"
! # First read the configuration file.
self.config = self.LoadConfig()
--- 144,150 ----
config_base = os.path.abspath(config_base)
self.ini_filename = config_base + "_bayes_customize.ini"
self.config_filename = config_base + "_configuration.pck"
! # Read the configuration file.
self.config = self.LoadConfig()
***************
*** 78,81 ****
--- 152,163 ----
import_core_spambayes_stuff(self.ini_filename)
+
+ bayes_base = config_base + "_bayes_database"
+ mdb_base = config_base + "_message_database"
+ # determine which db manager to use, and create it.
+ ManagerClass = [PickleStorageManager, DBStorageManager][use_db]
+ self.db_manager = ManagerClass(bayes_base, mdb_base)
+
+ self.bayes = self.message_db = None
self.LoadBayes()
self.message_store = msgstore.MAPIMsgStore(outlook)
***************
*** 115,119 ****
# (which really is OK!)
assert self.outlook is not None, "I need outlook :("
- ol = self.outlook
msgstore_folder = self.message_store.GetFolder(folder_id)
folder = msgstore_folder.GetOutlookItem()
--- 197,200 ----
***************
*** 161,174 ****
def LoadBayes(self):
if not os.path.exists(self.ini_filename):
raise ManagerError("The file '%s' must exist before the "
"database '%s' can be opened or created" % (
! self.ini_filename, self.bayes_filename))
bayes = message_db = None
try:
! bayes = cPickle.load(open(self.bayes_filename, 'rb'))
! print "Loaded bayes database from '%s'" % (self.bayes_filename,)
! except IOError:
! pass # ignore file-not-found
except:
print "Failed to load bayes database"
--- 242,256 ----
def LoadBayes(self):
+ import time
+ start = time.clock()
if not os.path.exists(self.ini_filename):
raise ManagerError("The file '%s' must exist before the "
"database '%s' can be opened or created" % (
! self.ini_filename, self.db_manager.bayes_filename))
bayes = message_db = None
try:
! # file-not-found handled gracefully by storage.
! bayes = self.db_manager.open_bayes()
! print "Loaded bayes database from '%s'" % (self.db_manager.bayes_filename,)
except:
print "Failed to load bayes database"
***************
*** 176,181 ****
traceback.print_exc()
try:
! message_db = cPickle.load(open(self.message_db_filename, 'rb'))
! print "Loaded message database from '%s'" % (self.message_db_filename,)
except IOError:
pass
--- 258,263 ----
traceback.print_exc()
try:
! message_db = self.db_manager.open_mdb()
! print "Loaded message database from '%s'" % (self.db_manager.mdb_filename,)
except IOError:
pass
***************
*** 185,188 ****
--- 267,272 ----
traceback.print_exc()
if bayes is None or message_db is None:
+ self.bayes = bayes
+ self.message_db = message_db
print "Either bayes database or message database is missing - creating new"
self.InitNewBayes()
***************
*** 193,203 ****
"%d spam and %d good messages" % (bayes.nspam, bayes.nham))
if len(message_db) != bayes.nham + bayes.nspam:
! print "*** - message database only has %d messages - bayes has %d - something is screwey" % \
(len(message_db), bayes.nham + bayes.nspam)
self.bayes = bayes
self.message_db = message_db
self.bayes_dirty = False
def LoadConfig(self):
try:
f = open(self.config_filename, 'rb')
--- 277,290 ----
"%d spam and %d good messages" % (bayes.nspam, bayes.nham))
if len(message_db) != bayes.nham + bayes.nspam:
! print "*** - message database has %d messages - bayes has %d - something is screwey" % \
(len(message_db), bayes.nham + bayes.nspam)
self.bayes = bayes
self.message_db = message_db
self.bayes_dirty = False
+ if self.verbose:
+ print "Loaded databases in %gms" % ((time.clock()-start)*1000)
def LoadConfig(self):
+ # Our 'config' file always uses a pickle
try:
f = open(self.config_filename, 'rb')
***************
*** 228,233 ****
def InitNewBayes(self):
! self.bayes = bayes_classifier.Bayes()
! self.message_db = {} # OK, so its not quite a DB yet <wink>
self.bayes_dirty = True
--- 315,324 ----
def InitNewBayes(self):
! if self.bayes is not None:
! self.db_manager.close_bayes(self.bayes)
! if self.message_db is not None:
! self.db_manager.close_mdb(self.message_db)
! self.bayes = self.db_manager.new_bayes()
! self.message_db = self.db_manager.new_mdb()
self.bayes_dirty = True
***************
*** 243,251 ****
print "Saving bayes database with %d spam and %d good messages" %\
(bayes.nspam, bayes.nham)
! print " ->", self.bayes_filename
! cPickle.dump(bayes, open(self.bayes_filename,"wb"), 1)
if self.verbose:
! print " ->", self.message_db_filename
! cPickle.dump(self.message_db, open(self.message_db_filename,"wb"), 1)
self.bayes_dirty = False
--- 334,342 ----
print "Saving bayes database with %d spam and %d good messages" %\
(bayes.nspam, bayes.nham)
! print " ->", self.db_manager.bayes_filename
! self.db_manager.store_bayes(self.bayes)
if self.verbose:
! print " ->", self.db_manager.mdb_filename
! self.db_manager.store_mdb(self.message_db)
self.bayes_dirty = False
More information about the Spambayes-checkins
mailing list