[Spambayes-checkins] spambayes/Outlook2000 classify.py,NONE,1.1 manager.py,NONE,1.1 rule.py,NONE,1.1 filter.py,1.1,1.2 train.py,1.1,1.2README.txt,1.1,1.2

Mark Hammond mhammond@users.sourceforge.net
Sat, 19 Oct 2002 09:23:39 -0700


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory usw-pr-cvs1:/tmp/cvs-serv19979

Modified Files:
	filter.py train.py README.txt 
Added Files:
	classify.py manager.py rule.py 
Log Message:
New version of the outlook addin code with a fancy GUI.


--- NEW FILE: classify.py ---
# Train a classifier from Outlook Mail folders
# Author: Sean D. True, WebReply.Com
# October, 2002
# Copyright PSF, license under the PSF license

import sys, os, os.path, getopt, cPickle, string
from win32com.client import Dispatch, constants
import pythoncom
import win32con
import classifier
from tokenizer import tokenize
from hammie import createbayes, Hammie

def classify_folder( f, mgr, config, progress):
    hammie = Hammie(mgr.bayes)
    messages = f.Messages
    pythoncom.CoInitialize() # We are called on a different thread.
    # We must get outlook in this thread - can't use the main thread :(
    outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")

    if not messages:
        progress.warning("Can't find messages in folder '%s'" % (f.Name,))
        return
    message = messages.GetFirst()
    while not progress.stop_requested() and message:
        try:
            progress.tick()
            headers = message.Fields[0x7D001E].Value
            headers = headers.encode('ascii', 'replace')
            body = message.Text.encode('ascii', 'replace')
            text = headers + body

            prob, clues = hammie.score(text, evidence=1)
            added_prop = False
            try:
                if outlook_ns is not None:
                    outlookItem = outlook_ns.GetItemFromID(message.ID)
                    format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
                    prop = outlookItem.UserProperties.Add(config.field_name, constants.olNumber, True, format)
                    prop.Value = prob
                    outlookItem.Save()
                    added_prop = True
            except "foo": # pythoncom.com_error, d:
                # Hrm - don't seem able to use outlook - use MAPI - but this
                # means the field doesn't automatically appear in the outlook "Field Chooser"
                # Tried explicity adding the field to the folder but still no go.
                added_prop = False
            if not  added_prop:
                message.Fields.Add(config.field_name, 5, prob)
                
            message.Update()
        except pythoncom.com_error, d:
            progress.warning("Failed to get a message: %s" % (str(d),) )
        message = messages.GetNext()

# Called back from the dialog to do the actual training.
def classifier(mgr, progress):
    session = mgr.mapi
    config = mgr.config.classify
    if not config.folder_ids:
        progress.error("You must specify at least one folder")
        return
    progress.set_status("Counting messages")
    folders = mgr.BuildFolderList(config.folder_ids, config.include_sub)
    num_msgs = 0
    for f in folders:
        num_msgs += f.Messages.Count + 1
    progress.set_max_ticks(num_msgs+3)

    for f in folders:
        progress.set_status("Processing folder '%s'" % (f.Name.encode("ascii", "replace"),))
        classify_folder(f, mgr, config, progress)
        if progress.stop_requested():
            return


def main():
    import manager
    mgr = manager.GetManager()
    
    import dialogs.ClassifyDialog
    d = dialogs.ClassifyDialog.ClassifyDialog(mgr, classifier)
    d.DoModal()
    mgr.Save()
    mgr.Close()

if __name__ == "__main__":
    main()

--- NEW FILE: manager.py ---
import cPickle
import os
import sys
import thread

import classifier
from tokenizer import tokenize
import win32com.client
import win32com.client.gencache
import pythoncom

# Suck in CDO type lib
win32com.client.gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}', 0, 1, 21, bForDemand = True)

try:
    this_filename = __file__
except NameError:
    this_filename = sys.argv[0]

class ManagerError(Exception):
    pass

class BayesManager:
    def __init__(self, config_base = "default", outlook = None, verbose = 1):
        self.verbose = verbose
        if not os.path.isabs(config_base):
            config_base = os.path.join( os.path.dirname(this_filename), config_base)
        config_base = os.path.abspath(config_base)
        self.ini_filename = config_base + "_bayes_customize.ini"
        self.bayes_filename = config_base + "_bayes_database.pck"
        self.config_filename = config_base + "_configuration.pck"

        # First read the configuration file.
        path = os.path.split(this_filename)[0]
        self.config = self.LoadConfig()

        cwd = os.getcwd()
        self.mapi = win32com.client.Dispatch("MAPI.Session")
        self.mapi.Logon(None, None, False, False)
        self._tls = {thread.get_ident(): {"outlook": outlook} }
        self.outlook = outlook
        os.chdir(cwd)
        
        self.LoadBayes()

    # Outlook gives us thread grief :(
    def WorkerThreadStarting(self):
        pythoncom.CoInitialize()
        self._tls[thread.get_ident()] = {}

    def WorkerThreadEnding(self):
        assert self._tls.has_key(thread.get_ident()), "WorkerThreadStarting hasn't been called for this thread"
        del self._tls[thread.get_ident()]
        pythoncom.CoUninitialize()

    def GetOutlookForCurrentThread(self):
        assert self._tls.has_key(thread.get_ident()), "WorkerThreadStarting hasn't been called for this thread"
        existing = self._tls[thread.get_ident()].get("outlook")
        if not existing:
            existing = win32com.client.Dispatch("Outlook.Application")
            self._tls[thread.get_ident()]["outlook"] = existing
        return existing
        
    def LoadBayes(self):
        if not os.path.exists(self.ini_filename):
            raise ManagerError("The file '%s' must exist before the database '%s' can be opened or created" % (self.ini_filename, self.bayes_filename))
        bayes = None
        try:
            bayes = cPickle.load(open(self.bayes_filename,'rb'))
            print "Loaded bayes database from '%s'" % (self.bayes_filename,)
        except IOError:
            pass # ignore file-not-found
        except:
            print "Failed to load bayes database"
            import traceback
            traceback.print_exc()
        if bayes is None:
            self.InitNewBayes()
            bayes = self.bayes
        if self.verbose:
            print "Bayes database initialized with %d spam and %d good messages" % (bayes.nspam, bayes.nham)
        self.bayes = bayes
        self.bayes_dirty = False

    def LoadConfig(self):
        try:
            ret = cPickle.load(open(self.config_filename,'rb'))
            if self.verbose > 1:
                print "Loaded configuration from '%s':" % (self.config_filename,)
                ret._dump()
        except (AttributeError, ImportError):
            ret = _ConfigurationRoot()
            if self.verbose > 1:
                print "FAILED to load configuration from '%s - using default:" % (self.config_filename,)
                import traceback
                traceback.print_exc()
        return ret

    def InitNewBayes(self):
            os.environ["BAYESCUSTOMIZE"]=self.ini_filename
            self.bayes = classifier.Bayes()
            self.bayes_dirty = True

    def SaveBayes(self):
        bayes = self.bayes
        if self.verbose:
            print "Saving bayes database with %d spam and %d good messages" % (bayes.nspam, bayes.nham)
            print " ->", self.bayes_filename
        cPickle.dump(bayes, open(self.bayes_filename,"wb"), 1)

    def SaveConfig(self):
        if self.verbose > 1:
            print "Saving configuration:"
            self.config._dump()
            print " ->", self.config_filename
        cPickle.dump(self.config, open(self.config_filename,"wb"), 1)

    def Save(self):
        self.SaveConfig()
        if self.bayes_dirty:
            self.SaveBayes()
            self.bayes_dirty = False
        else:
            print "Bayes database is not dirty - not writing"

    def Close(self):
        if self.mapi is not None:
            self.mapi.Logoff()
            self.mapi = None
        if self.bayes_dirty and self.bayes:
            print "Warning: BayesManager closed while Bayes database dirty"
        self.bayes = None
        self.config = None
        self._tls = None

    def BuildFolderList(self, folder_ids, include_sub):
        ret = {}
        for id in folder_ids:
            subs = []
            try:
                f = self.mapi.GetFolder(id)
                if include_sub:
                    sub_ids = []
                    subs = f.Folders
                    for i in range(1, subs.Count):
                        sub_ids.append(subs.Item(i).ID)
                    subs = self.BuildFolderList(sub_ids, True)
            except pythoncom.error:
                continue
            ret[id] = f
            for sub in subs:
                ret[sub.ID] = sub
        return ret.values()

    def YieldMessageList(self, folder):
        messages = folder.Messages
        if not messages:
            print "Can't find messages in folder '%s'" % (folder.Name,)
            return
        message = messages.GetFirst()
        while message is not None:
            yield message
            message = messages.GetNext()

# configuration stuff we persist.
class _ConfigurationContainer:
    def __init__(self, **kw):
        self.__dict__.update(kw)
    def __setstate__(self, state):
        self.__init__() # ensure any new/default values setup
        self.__dict__.update(state)
    def _dump(self, thisname="<root>", level=0):
        import pprint
        prefix = "  " * level
        print "%s%s:" % (prefix, thisname)
        for name, ob in self.__dict__.items():
            d = getattr(ob, "_dump", None)
            if d is None:
                print "%s %s: %s" % (prefix, name, pprint.pformat(ob))
            else:
                d(name, level+1)
                
class _ConfigurationRoot(_ConfigurationContainer):
    def __init__(self):
        self.training = _ConfigurationContainer(
            ham_folder_ids = [],
            ham_include_sub = False,
            spam_folder_ids = [],
            spam_include_sub = False,
            )
        self.classify = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            field_name = "SpamProb",
            )
        self.filter = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            )
        self.filter_now = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            only_unread = False,
            )
        self.rules = []
       

_mgr = None

def GetManager():
    global _mgr
    if _mgr is None:
        _mgr = BayesManager()
    return _mgr

if __name__=='__main__':
    try:
        mgr = BayesManager()
    except ManagerError, d:
        print "Error initializing Bayes manager"
        print d
        
--- NEW FILE: rule.py ---
import pythoncom
from win32com.client import constants
import time

class Rule:
    def __init__(self):
        self.name = "New Rule"
        self.enabled = True
        self.min = 0.0
        self.max = 0.9
        self.action = "None"
        self.flag_message = True
        self.write_field = True
        self.write_field_name = "SpamProb"
        self.folder_id = ""

    def __repr__(self):
        bits = ["Rule at 0x%x:\n" % (id(self),)]
        for name, ob in self.__dict__.items():
            bits.append(" rule.%s: %r\n" % (name, ob))
        return "".join(bits)

    def GetProblem(self, mgr):
        if self.min > self.max:
            return "The maximum value must be greater than the minimum"
        if self.action != "None":
            if not self.folder_id:
                return "You must specify a folder for 'Move' or 'Copy'"
            if self._GetFolder(mgr) is None:
                return "Can not locate the destination folder"
        if self.write_field and not self.write_field_name:
            return "You must specify the field name to create"

    def _GetFolder(self, mgr):
        try:
            return mgr.mapi.GetFolder(self.folder_id)
        except pythoncom.com_error:
            return None

    def Act(self, mgr, msg, prob):
        if mgr.verbose > 1:
            print "Rule '%s': %.2f->%.2f (%.2f) (%s)" % (self.name, self.min, self.max, prob, msg.Subject[:20].encode("ascii", "replace"))
        if prob < self.min or prob > self.max:
            return False
        # Do mods before we move.
        outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")
        outlook_message = outlook_ns.GetItemFromID(msg.ID)
        if self.flag_message:
            outlook_message.FlagRequest = "Check Spam"
            outlook_message.FlagStatus = constants.olFlagMarked
            outlook_message.Save()
        if self.write_field:            
            format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
            prop = outlook_message.UserProperties.Add(self.write_field_name, constants.olNumber, True, format)
            prop.Value = prob
            outlook_message.Save()
        
        if self.action == "None":
            pass
        elif self.action == "Copy":
            outlook_message.Copy(outlook_ns.GetFolderFromID(self.folder_id))
        elif self.action == "Move":
            print "moving", self.flag_message
            outlook_message.Move(outlook_ns.GetFolderFromID(self.folder_id))
        else:
            print "Eeek - bad action", self.action

        return True


Index: filter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** filter.py	4 Oct 2002 22:28:47 -0000	1.1
--- filter.py	19 Oct 2002 16:23:37 -0000	1.2
***************
*** 4,123 ****
  # Copyright PSF, license under the PSF license
  
! # Make py2exe happy
! import dbhash, anydbm
! 
! import sys, os, os.path, cPickle, string, getopt
! import win32com.client
! 
! import email
! import email.Parser
! from hammie import createbayes, Hammie
! import classifier
! 
! 
! def findFolder(f, findName, name=""):
!     folders = f.Folders
!     folder = folders.GetFirst()
!     while folder:
!         nm = "%s/%s" % (name, folder.Name)
!         nm = nm.encode('ascii', 'replace')
!         if nm == findName:
!             return folder
!         try:
!             f = findFolder(folder, findName, nm)
!             if f:
!                 return f
!         except:
!             pass
!         folder = folders.GetNext()
!     return None
  
  
! from tokenizer import tokenize
! def filter(bayes, rootFolder, folderName, targetName=None, over=None,
!            under=None, detail=None):
!     hammie = Hammie(bayes)
!     n = nover = nunder = 0
!     f = findFolder(rootFolder, folderName)
!     targetf = None
!     if targetName:
!         targetf = findFolder(rootFolder, targetName)
!         if not targetf:
!             print "Can't find folder %s to move messages to" % targetName
!             return
!     messages = f.Messages
!     message = messages.GetFirst()
!     while message:
          try:
!             headers = "%s" % message.fields[0x7D001E]
              headers = headers.encode('ascii', 'replace')
              body = message.Text.encode('ascii', 'replace')
!             n = n + 1
!         except:
!             message = messages.GetNext()
              continue
!         text = headers + body
          prob, clues = hammie.score(text, evidence=1)
!         if over <> None and prob >= over:
!             nover = nover + 1
!             if detail:
!                 print "***Over threshold", prob, over
!                 for i in range(1, message.recipients.Count+1):
!                     print message.Recipients[i].Address,
!                 print message.Subject.encode('ascii','replace')
!                 print hammie.formatclues(clues)
!             if targetf:
!                 message.MoveTo(targetf.ID)
!         if under <> None and prob <= under:
!             nunder = nunder + 1
!             if detail:
!                 print "***Under threshold", prob, under
!                 for i in range(1, message.recipients.Count+1):
!                     print message.Recipients[i].Address,
!                 print message.Subject.encode('ascii','replace')
!                 print hammie.formatclues(clues)
!             if targetf:
!                 message.MoveTo(targetf.ID)
!         message = messages.GetNext()
!     print "Total %d, over %d under %d" % (n, nover, nunder)
  
! def usage():
!     print "Usage: filter.py --bayes=bayes.pck --from=folder,folder,folder [--to=folder] [--detail] [--over=float|--under=float]"
!     print """Example: python filter.py --from=/Personal/Hotmail,/Personal/ExJunk
! --over=.35 --detail --to=/SpamMaybe"""
  
  def main():
!     from hammie import createbayes
!     db_name = 'bayes.pck'
!     folders = []
!     options = ["over=", "under=", "bayes=", "to=", "from=", "detail"]
!     dodetail=targetName=to=over=under= None
!     opts,args = getopt.getopt(sys.argv[1:], None, options)
!     if args:
!         usage()
!         sys.exit(1)
!     for opt, arg in opts:
!         if opt == "--under": under = float(arg)
!         elif opt == "--over":  over = float(arg)
!         elif opt == "--bayes":  db_name = arg
!         elif opt == "--to": targetName = arg
!         elif opt == "--from": folders = string.split(arg, ",")
!         elif opt == "--detail": dodetail = 1
!     if not (over or under) or not folders:
!         usage()
!         sys.exit(1)
!     bayes = cPickle.load(open(db_name,'rb'))
!     cwd =  os.getcwd()
!     session = win32com.client.Dispatch("MAPI.Session")
!     session.Logon()
!     personalFolders = findFolder(session.GetFolder(''),
!                                  '/Top of Personal Folders')
!     for folder in folders:
!         print "Filtering %s, over: %s under %s" % (arg, over, under)
!         filter(bayes, personalFolders, folder, targetName, over=over,
!                under=under, detail=dodetail)
!     session.Logoff()
!     session = None
!     print 'Done'
  
  if __name__ == "__main__":
--- 4,78 ----
  # Copyright PSF, license under the PSF license
  
! import sys, os
! from win32com.client import Dispatch, constants
! import pythoncom
! import rule
  
+ from hammie import Hammie
  
! def filter_folder(f, mgr, progress, filter):
!     only_unread = filter.only_unread
!     hammie = Hammie(mgr.bayes)
!     num_messages = 0
!     for message in mgr.YieldMessageList(f):
!         if progress.stop_requested():
!             break
!         progress.tick()
!         if only_unread and not message.Unread:
!             continue
!         
          try:
!             headers = message.Fields[0x7D001E].Value
              headers = headers.encode('ascii', 'replace')
              body = message.Text.encode('ascii', 'replace')
!             text = headers + body
!         except pythoncom.com_error, d:
!             progress.warning("Failed to get a message: %s" % (str(d),) )
              continue
! 
          prob, clues = hammie.score(text, evidence=1)
!         did_this_message = False
!         for rule in mgr.config.rules:
!             if rule.enabled:
!                 try:
!                     if rule.Act(mgr, message, prob):
!                         did_this_message = True
!                 except:
!                     print "Rule failed!"
!                     import traceback
!                     traceback.print_exc()
!         if did_this_message:
!             num_messages += 1
!     return num_messages
  
!         
! def filterer(mgr, progress, filter):
!     if not filter.folder_ids:
!         progress.error("You must specify at least one folder")
!         return
! 
!     progress.set_status("Counting messages")
!     folders = mgr.BuildFolderList(filter.folder_ids, filter.include_sub)
!     num_msgs = 0
!     for f in folders:
!         num_msgs += f.Messages.Count + 1
!     progress.set_max_ticks(num_msgs+3)
!     num = 0
!     for f in folders:
!         progress.set_status("Filtering folder '%s'" % (f.Name.encode("ascii", "replace"),))
!         num += filter_folder(f, mgr, progress, filter)
!         if progress.stop_requested():
!             return
!     progress.set_status("Filter acted upon %d messages" % (num,))
  
  def main():
!     import manager
!     mgr = manager.GetManager()
!     
!     import dialogs.FilterDialog
!     d = dialogs.FilterDialog.FilterArrivalsDialog(mgr, rule.Rule, filterer)
!     d.DoModal()
!     mgr.Save()
!     mgr.Close()
  
  if __name__ == "__main__":

Index: train.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/train.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** train.py	4 Oct 2002 22:28:47 -0000	1.1
--- train.py	19 Oct 2002 16:23:37 -0000	1.2
***************
*** 6,92 ****
  import sys, os, os.path, getopt, cPickle, string
  import win32com.client
  import classifier
  from tokenizer import tokenize
  
! def findFolder(f,findName, name=""):
!     folders = f.Folders
!     folder = folders.GetFirst()
!     while folder:
!         nm = "%s/%s" % (name, folder.Name)
!         nm = nm.encode('ascii','replace')
!         if nm == findName:
!             return folder
!         try:
!             f = findFolder(folder, findName, nm)
!             if f: return f
!         except:
!             pass
!         folder = folders.GetNext()
!     return None
! 
! def train( bayes, rootFolder,folderName, isspam):
!     f = findFolder(rootFolder, folderName)
!     if not f:
!         print "Can't find folder", folderName
!         return
!     messages = f.Messages
!     if not messages:
!         print "Can't find messages in folder", folderName
!         return
!     message = messages.GetFirst()
!     while message:
          try:
!             headers = "%s" % message.fields[0x7D001E]
              headers = headers.encode('ascii', 'replace')
              body = message.Text.encode('ascii', 'replace')
!             text = headers + body
!             bayes.learn(tokenize(text), isspam, False)
!         except:
!             pass
!         message = messages.GetNext()
  
! def usage():
!     print "Usage: train.py --bayes=bayes.pck --spam=folder,folder,folder --ham=folder,folder,folder"
!     print """Example: python train.py --bayes=bayes.pck --spam=/JunkMail,/Personal/Hotmail,/Personal/Spam  --ham="/Dragon People,/WebReply,/House,/Tenberry,/Receipts and coupons,/Rational and MIT,/Lists/List-mod_python,/Lists/List-other,/List-Webware,/Microsoft,/Fishing,/Ebusiness,/Amazon" """
  
  
  
! def main():
!     db_name = 'bayes.pck'
!     spam = []
!     ham = []
!     options = ["ham=", "spam=", "bayes="]
!     opts,args = getopt.getopt(sys.argv[1:], None, options)
!     if args:
!         usage()
!         sys.exit(1)
!     for opt,arg in opts:
!         if opt == "--spam": spam = string.split(arg, ',')
!         elif opt == "--ham":  ham = string.split(arg,',')
!         elif opt == "--bayes":  db_name = arg
!     if not spam and not ham:
!         usage()
!         sys.exit(1)
!     cwd =  os.getcwd()
!     session = win32com.client.Dispatch("MAPI.Session")
!     session.Logon()
!     personalFolders = findFolder(session.GetFolder(''),
!                                  '/Top of Personal Folders')
!     bayes = classifier.Bayes()
!     for folder in spam:
!         print "Training with %s as spam" % folder
!         train(bayes, personalFolders,folder, 1)
!     for folder in ham:
!         print "Training with %s as ham" % folder
!         train(bayes, personalFolders,folder, 0)
!     session.Logoff()
!     session = None
!     print 'Updating probabilities...'
      bayes.update_probabilities()
!     print ("Done with training %s, built with %d examples and %d counter "
!            "examples" % (db_name, bayes.nspam, bayes.nham))
!     db_name = os.path.join(cwd, db_name)
!     print 'Writing DB...'
!     cPickle.dump(bayes, open(db_name,"wb"), 1)
  
  if __name__ == "__main__":
--- 6,82 ----
  import sys, os, os.path, getopt, cPickle, string
  import win32com.client
+ import pythoncom
+ import win32con
+ 
  import classifier
  from tokenizer import tokenize
  
! def train_folder( f, isspam, mgr, progress):
!     for message in mgr.YieldMessageList(f):
!         if progress.stop_requested():
!             break
!         progress.tick()
          try:
!             # work with MAPI until we work out how to get headers from outlook
!             message = mgr.mapi.GetMessage(message.ID)
!             headers = message.Fields[0x7D001E].Value
              headers = headers.encode('ascii', 'replace')
              body = message.Text.encode('ascii', 'replace')
!         except pythoncom.com_error:
!             progress.warning("failed to get a message")
!             continue
!         text = headers + body
!         mgr.bayes.learn(tokenize(text), isspam, False)
  
! # Called back from the dialog to do the actual training.
! def trainer(mgr, progress):
!     pythoncom.CoInitialize()
!     config = mgr.config
!     mgr.InitNewBayes()
!     bayes = mgr.bayes
!     session = mgr.mapi
! 
!     if not config.training.ham_folder_ids or not config.training.spam_folder_ids:
!         progress.error("You must specify at least one spam, and one good folder")
!         return
!     progress.set_status("Counting messages")
!     ham_folders = mgr.BuildFolderList(config.training.ham_folder_ids, config.training.ham_include_sub)
!     spam_folders = mgr.BuildFolderList(config.training.spam_folder_ids, config.training.ham_include_sub)
!     num_msgs = 0
!     for f in ham_folders + spam_folders:
!         num_msgs += f.Messages.Count + 1
!     progress.set_max_ticks(num_msgs+3)
  
+     for f in ham_folders:
+         progress.set_status("Processing good folder '%s'" % (f.Name.encode("ascii", "replace"),))
+         train_folder(f, 0, mgr, progress)
+         if progress.stop_requested():
+             return
  
+     for f in spam_folders:
+         progress.set_status("Processing spam folder '%s'" % (f.Name.encode("ascii", "replace"),))
+         train_folder(f, 1, mgr, progress)
+         if progress.stop_requested():
+             return
  
!     progress.tick()
!     progress.set_status('Updating probabilities...')
      bayes.update_probabilities()
!     progress.tick()
!     if progress.stop_requested():
!         return
!     mgr.bayes_dirty = True
!     progress.set_status("Completed training with %d spam and %d good messages" % (bayes.nspam, bayes.nham))
! 
! def main():
!     import manager
!     mgr = manager.GetManager()
! 
!     import dialogs.TrainingDialog
!     d = dialogs.TrainingDialog.TrainingDialog(mgr, trainer)
!     d.DoModal()
! 
!     mgr.Save()    
!     mgr.Close()
  
  if __name__ == "__main__":

Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/README.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** README.txt	4 Oct 2002 22:28:47 -0000	1.1
--- README.txt	19 Oct 2002 16:23:37 -0000	1.2
***************
*** 1,5 ****
  This directory contains tools for using the classifier with Microsoft
! Outlook 2000, courtesy of Sean True.  Note that you need Python's win32com
! extensions.
  
  train.py
--- 1,10 ----
  This directory contains tools for using the classifier with Microsoft
! Outlook 2000, courtesy of Sean True and Mark Hammond.  Note that you need 
! Python's win32com extensions (http://starship.python.net/crew/mhammond)
! 
! ** NOTE ** - You also need CDO installed.  This comes with Outlook 2k, but is
! not installed by default.  You may need to find your Office 2000 CD, select
! Add/Remove components, and find CDO under Outlook.  If you see a COM error
! compaining about "MAPI.Session", this is your problem.
  
  train.py
***************
*** 7,17 ****
  
  filter.py
!     Moves msgs among Outlook Mail folders, based on classifier score.
! 
! spam.py
!     Dump Outlook Mail folders into the spam reservoir.
! 
! 
  
  Comments from Sean:
  
--- 12,24 ----
  
  filter.py
!     Moves and modifies msgs among Outlook Mail folders, based on classifier 
!     score.
  
+ classify.py
+     Creates a field in each message with the classifier score.  Once run, 
+     the Outlook Field Chooser can be used to display, sort etc the field,
+     or used to change formatting of these messages.  The field will appear
+     in "user defined fields"
+     
  Comments from Sean:
  
***************
*** 24,58 ****
  filter.  Closing and reopening Outlook always seems to restore things,
  with no fuss.  Your mileage may vary.  Buyer beware.  Worth what you paid.
! 
! Brad Morgan comments that in an environment with multiple InfoStores
! (message stores?), my simple folder finder does not work.  He uses this
! work around:
! 
! ===============
! # This didn't work:
! # personalFolders = findFolder(folder, 'Personal Folders')
! #
! # The following was required:
! # (Note: I have two infostores and I've hard-coded the index of
! # 'Personal Folders')
! 
! infostores = session.InfoStores
! print "There are %d infostores" % infostores.Count
! infostore = infostores[1]
! print "Infostore = ", infostore.Name
! personalFolders = infostore.RootFolder
! =================
! 
! It deserves an option to select the infostore wanted by name.
! 
! Enjoy.
  
  Copyright transferred to PSF from Sean D. True and WebReply.com.
  Licensed under PSF, see Tim Peters for IANAL interpretation.
  
  Ask me technical questions, and if your mail doesn't get eaten by a broken
  spam filter, I'll try to help.
- 
- 
  -- Sean
  seant@iname.com
--- 31,48 ----
  filter.  Closing and reopening Outlook always seems to restore things,
  with no fuss.  Your mileage may vary.  Buyer beware.  Worth what you paid.
! (Mark hasn't seen this)
  
  Copyright transferred to PSF from Sean D. True and WebReply.com.
  Licensed under PSF, see Tim Peters for IANAL interpretation.
  
+ Copyright transferred to PSF from Mark Hammond.
+ Licensed under PSF, see Tim Peters for IANAL interpretation.
+ 
  Ask me technical questions, and if your mail doesn't get eaten by a broken
  spam filter, I'll try to help.
  -- Sean
  seant@iname.com
+ 
+ Ask Sean all the technical questions <wink>
+ -- Mark
+ mhammond@skippinet.com.au