[Spambayes-checkins] spambayes/Outlook2000 addin.py,NONE,1.1 config.py,NONE,1.1 classify.py,1.3,1.4 filter.py,1.4,1.5 manager.py,1.9,1.10rule.py,1.2,1.3

Mark Hammond mhammond@users.sourceforge.net
Sun, 20 Oct 2002 00:47:03 -0700


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory usw-pr-cvs1:/tmp/cvs-serv30441

Modified Files:
	classify.py filter.py manager.py rule.py 
Added Files:
	addin.py config.py 
Log Message:
Brand spanking new version that actually filters mail as they
arrive (woo hooo)


--- NEW FILE: addin.py ---
# Mark's Outlook addin


import warnings
warnings.filterwarnings("ignore", category=FutureWarning, append=1) # sick off the new hex() warnings!

import sys

from win32com import universal
from win32com.server.exception import COMException
from win32com.client import gencache, DispatchWithEvents, Dispatch
import winerror
import win32api
import pythoncom
from win32com.client import constants

# If we are not running in a console, redirect all print statements to the
# win32traceutil collector.
# You can view output either from Pythonwin's "Tools->Trace Collector Debugging Tool",
# or simply run "win32traceutil.py" from a command prompt.
try:
    win32api.GetConsoleTitle()
except win32api.error:
    # No console - redirect
    import win32traceutil
    print "Outlook Spam Addin module loading"


# A lovely big block that attempts to catch the most common errors - COM objects not installed.
try:
    # Support for COM objects we use.
    gencache.EnsureModule('{00062FFF-0000-0000-C000-000000000046}', 0, 9, 0, bForDemand=True) # Outlook 9
    gencache.EnsureModule('{2DF8D04C-5BFA-101B-BDE5-00AA0044DE52}', 0, 2, 1, bForDemand=True) # Office 9
    gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}', 0, 1, 21, bForDemand = True) # CDO

    # The TLB defiining the interfaces we implement
    universal.RegisterInterfaces('{AC0714F2-3D04-11D1-AE7D-00A0C90F26F4}', 0, 1, 0, ["_IDTExtensibility2"])
except pythoncom.com_error, (hr, msg, exc, arg):
    if __name__ != '__main__':
        # Error when not running as a script - eeek - just let it go.
        raise
    try:
        pythoncom.MakeIID("MAPI.Session")
        have_cdo = True
    except pythoncom.com_error:
        have_cdo = False
    print "This Addin requires that Outlook 2000 with CDO be installed on this machine."
    print
    if have_cdo:
        print "However, these appear to be installed.  Error details:"
        print "COM Error 0x%x (%s)" % (hr, msg)
        if exc:
            print "Exception: %s" % (exc)
        print 
        print "Sorry, I can't be more help, but I can't continue while I have this error."
    else:
        print "CDO is not currently installed.  To install CDO, you must locate the"
        print "media from which you installed Outlook (such as Office 2000 CD or "
        print "sharepoint), re-run setup, select Outlook, enable CDO."
        print
        print "Please install CDO then attempt this registration again."
    sys.exit(1)

# Whew - we seem to have all the COM support we need - let's rock!

class ButtonEvent:
    def Init(self, handler, args = ()):
        self.handler = handler
        self.args = args

    def OnClick(self, button, cancel):
        self.handler(*self.args)

class FolderItemsEvent:
    def Init(self, target, application, manager):
        self.application = application
        self.manager = manager
        self.target = target

    def OnItemAdd(self, item):
        if self.manager.config.filter.enabled:
            mapi_message = self.manager.mapi.GetMessage(item.EntryID)
            import filter
            num_rules = filter.filter_message(mapi_message, self.manager)
            print "%d Spam rules fired for message '%s'" % (num_rules, item.Subject.encode("ascii", "replace"))
        else:
            print "Spam filtering is disabled - ignoring new message"


class OutlookAddin:
    _com_interfaces_ = ['_IDTExtensibility2']
    _public_methods_ = []
    _reg_clsctx_ = pythoncom.CLSCTX_INPROC_SERVER
    _reg_clsid_ = "{3556EDEE-FC91-4cf2-A0E4-7489747BAB10}"
    _reg_progid_ = "SpamBayes.OutlookAddin"
    _reg_policy_spec_ = "win32com.server.policy.EventHandlerPolicy"

    def __init__(self):
        self.folder_hooks = {}
        self.application = None

    def OnConnection(self, application, connectMode, addin, custom):
        print "SpamAddin - Connecting to Outlook"
        self.application = application

        # Create our bayes manager
        import manager
        self.manager = manager.GetManager()
        
        # ActiveExplorer may be none when started without a UI (eg, WinCE synchronisation)
        activeExplorer = application.ActiveExplorer()
        if activeExplorer is not None:
            bars = activeExplorer.CommandBars
            toolbar = bars.Item("Standard")
            item = toolbar.Controls.Add(Type=constants.msoControlButton, Temporary=True)
            # Hook events for the item
            item = self.toolbarButton = DispatchWithEvents(item, ButtonEvent)
            item.Init(manager.ShowManager, (self.manager,))
            item.Caption="Anti-Spam"
            item.TooltipText = "Define anti-spam filters"
            item.Enabled = True

        # Create a notification hook for all folders we filter.
        self.UpdateFolderHooks()

    def UpdateFolderHooks(self):
        new_hooks = {}
        for mapi_folder in self.manager.BuildFolderList(self.manager.config.filter.folder_ids, self.manager.config.filter.include_sub):
            eid = mapi_folder.ID
            existing = self.folder_hooks.get(eid)
            if existing is None:
                folder = self.application.GetNamespace("MAPI").GetFolderFromID(eid)
                try:
                    new_hook = DispatchWithEvents(folder.Items, FolderItemsEvent)
                except ValueError:
                    print "WARNING: Folder '%s' can not hook events" % (folder.Name,)
                    new_hook = None
                if new_hook is not None:
                    new_hook.Init(folder, self.application, self.manager)
                    new_hooks[eid] = new_hook
                    print "Created new message hook for", folder.Name
            else:
                new_hooks[eid] = existing
        self.folder_hooks = new_hooks
            
    def OnDisconnection(self, mode, custom):
        print "SpamAddin - Disconnecting from Outlook"
        self.folder_hooks = None
        self.application = None
        if self.manager is not None:
            self.manager.Save()
            self.manager.Close()
            self.manager = None

    def OnAddInsUpdate(self, custom):
        print "SpamAddin - OnAddInsUpdate", custom
    def OnStartupComplete(self, custom):
        print "SpamAddin - OnStartupComplete", custom
    def OnBeginShutdown(self, custom):
        print "SpamAddin - OnBeginShutdown", custom

def RegisterAddin(klass):
    import _winreg
    key = _winreg.CreateKey(_winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Office\\Outlook\\Addins")
    subkey = _winreg.CreateKey(key, klass._reg_progid_)
    _winreg.SetValueEx(subkey, "CommandLineSafe", 0, _winreg.REG_DWORD, 0)
    _winreg.SetValueEx(subkey, "LoadBehavior", 0, _winreg.REG_DWORD, 3)
    _winreg.SetValueEx(subkey, "Description", 0, _winreg.REG_SZ, klass._reg_progid_)
    _winreg.SetValueEx(subkey, "FriendlyName", 0, _winreg.REG_SZ, klass._reg_progid_)

def UnregisterAddin(klass):
    import _winreg
    try:
        _winreg.DeleteKey(_winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Office\\Outlook\\Addins\\" + klass._reg_progid_)
    except WindowsError:
        pass

if __name__ == '__main__':
    import win32com.server.register
    win32com.server.register.UseCommandLine(OutlookAddin)
    if "--unregister" in sys.argv:
        UnregisterAddin(OutlookAddin)
    else:
        RegisterAddin(OutlookAddin)

--- NEW FILE: config.py ---
# configuration stuff we persist via a pickle
# Can't be defined in any module that may be used as "__main__"
# or as a module.
import pprint

class _ConfigurationContainer:
    def __init__(self, **kw):
        self.__dict__.update(kw)

    # Crap state-loading code so when we load an early version of the pickle
    # any attributes in the new version are considered defaults.
    # XXX - I really really want a better scheme than pickles etc here :(
    def _update_from(self, dict):
        for name, val in dict.items():
            updater = getattr(val, "_update_from", None)
            if updater is not None and self.__dict__.has_key(name):
                self.__dict__[name]._update_from(val.__dict__)
            else:
                self.__dict__[name] = val

    def __setstate__(self, state):
        self.__init__() # ensure any new/default values setup
        self._update_from(state)

    def _dump(self, thisname="<root>", level=0):
        import pprint
        prefix = "  " * level
        print "%s%s:" % (prefix, thisname)
        for name, ob in self.__dict__.items():
            d = getattr(ob, "_dump", None)
            if d is None:
                print "%s %s: %s" % (prefix, name, pprint.pformat(ob))
            else:
                d(name, level+1)
                
class ConfigurationRoot(_ConfigurationContainer):
    def __init__(self):
        self.training = _ConfigurationContainer(
            ham_folder_ids = [],
            ham_include_sub = False,
            spam_folder_ids = [],
            spam_include_sub = False,
            )
        self.classify = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            field_name = "SpamProb",
            )
        self.filter = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            enabled = False,
            )
        self.filter_now = _ConfigurationContainer(
            folder_ids = [],
            include_sub = False,
            only_unread = False,
            )
        self.rules = []

if __name__=='__main__':
    print "Please run 'manager.py'"

Index: classify.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/classify.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** classify.py	19 Oct 2002 22:30:02 -0000	1.3
--- classify.py	20 Oct 2002 07:47:00 -0000	1.4
***************
*** 13,17 ****
  
  def classify_folder( f, mgr, config, progress):
!     hammie = Hammie(mgr.bayes)
      messages = f.Messages
      pythoncom.CoInitialize() # We are called on a different thread.
--- 13,17 ----
  
  def classify_folder( f, mgr, config, progress):
!     hammie = mgr.hammie
      messages = f.Messages
      pythoncom.CoInitialize() # We are called on a different thread.
***************
*** 30,35 ****
              body = message.Text.encode('ascii', 'replace')
              text = headers + body
! 
!             prob, clues = hammie.score(text, evidence=True)
              added_prop = False
              try:
--- 30,34 ----
              body = message.Text.encode('ascii', 'replace')
              text = headers + body
!             prob = hammie.score(text, evidence=False)
              added_prop = False
              try:

Index: filter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** filter.py	19 Oct 2002 22:30:02 -0000	1.4
--- filter.py	20 Oct 2002 07:47:00 -0000	1.5
***************
*** 9,18 ****
  import rule
  
! from hammie import Hammie
  
  def filter_folder(f, mgr, progress, filter):
      only_unread = filter.only_unread
-     hammie = Hammie(mgr.bayes)
      num_messages = 0
      for message in mgr.YieldMessageList(f):
          if progress.stop_requested():
--- 9,39 ----
  import rule
  
! def filter_message(message, mgr):
!     try:
!         headers = message.Fields[0x7D001E].Value
!         headers = headers.encode('ascii', 'replace')
!         body = message.Text.encode('ascii', 'replace')
!         text = headers + body
!     except pythoncom.com_error, d:
!         print "Failed to get a message: %s" % (d,)
!         return
! 
!     prob = mgr.hammie.score(text, evidence=False)
!     num_rules = 0
!     for rule in mgr.config.rules:
!         if rule.enabled:
!             try:
!                 if rule.Act(mgr, message, prob):
!                     num_rules += 1
!             except:
!                 print "Rule failed!"
!                 import traceback
!                 traceback.print_exc()
!     return num_rules
  
  def filter_folder(f, mgr, progress, filter):
      only_unread = filter.only_unread
      num_messages = 0
+     hammie = mgr.hammie
      for message in mgr.YieldMessageList(f):
          if progress.stop_requested():
***************
*** 21,47 ****
          if only_unread and not message.Unread:
              continue
! 
!         try:
!             headers = message.Fields[0x7D001E].Value
!             headers = headers.encode('ascii', 'replace')
!             body = message.Text.encode('ascii', 'replace')
!             text = headers + body
!         except pythoncom.com_error, d:
!             progress.warning("Failed to get a message: %s" % (str(d),) )
!             continue
! 
!         prob, clues = hammie.score(text, evidence=True)
!         did_this_message = False
!         for rule in mgr.config.rules:
!             if rule.enabled:
!                 try:
!                     if rule.Act(mgr, message, prob):
!                         did_this_message = True
!                 except:
!                     print "Rule failed!"
!                     import traceback
!                     traceback.print_exc()
!         if did_this_message:
              num_messages += 1
      return num_messages
  
--- 42,48 ----
          if only_unread and not message.Unread:
              continue
!         if filter_message(message, mgr):
              num_messages += 1
+         
      return num_messages
  

Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** manager.py	19 Oct 2002 22:57:59 -0000	1.9
--- manager.py	20 Oct 2002 07:47:00 -0000	1.10
***************
*** 6,23 ****
  import thread
  
- import classifier
- from tokenizer import tokenize
  import win32com.client
  import win32com.client.gencache
  import pythoncom
  
! # Suck in CDO type lib
! win32com.client.gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}',
!                                       0, 1, 21, bForDemand=True)
  
  try:
!     this_filename = __file__
  except NameError:
!     this_filename = sys.argv[0]
  
  class ManagerError(Exception):
--- 6,36 ----
  import thread
  
  import win32com.client
  import win32com.client.gencache
  import pythoncom
  
! import config
  
  try:
!     this_filename = os.path.abspath(__file__)
  except NameError:
!     this_filename = os.path.abspath(sys.argv[0])
! 
! # This is a little of a hack <wink>.  We are generally in a child directory of the
! # bayes code.  To help installation, we handle the fact that this may not be
! # on sys.path.
! try:
!     import classifier
! except ImportError:
!     parent = os.path.abspath(os.path.join(os.path.dirname(this_filename), ".."))
!     sys.path.insert(0, parent)
!     del parent
!     import classifier
! 
! import hammie
! 
! # Suck in CDO type lib
! win32com.client.gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}',
!                                       0, 1, 21, bForDemand=True)
  
  class ManagerError(Exception):
***************
*** 75,78 ****
--- 88,92 ----
          bayes = None
          try:
+             os.environ["BAYESCUSTOMIZE"]=self.ini_filename
              bayes = cPickle.load(open(self.bayes_filename,'rb'))
              print "Loaded bayes database from '%s'" % (self.bayes_filename,)
***************
*** 90,93 ****
--- 104,108 ----
                     "%d spam and %d good messages" % (bayes.nspam, bayes.nham))
          self.bayes = bayes
+         self.hammie = hammie.Hammie(bayes)
          self.bayes_dirty = False
  
***************
*** 108,117 ****
                  ret._dump()
          except (AttributeError, ImportError):
!             ret = _ConfigurationRoot()
              if self.verbose > 1:
!                 print ("FAILED to load configuration from '%s "
!                        "- using default:" % self.config_filename)
!                 import traceback
!                 traceback.print_exc()
          return ret
  
--- 123,136 ----
                  ret._dump()
          except (AttributeError, ImportError):
!             ret = config.ConfigurationRoot()
!             print "FAILED to load configuration from '%s' - using default:" % (self.config_filename,)
!             import traceback
!             traceback.print_exc()
!         except IOError, details:
!             # File-not-found - less serious.
!             ret = config.ConfigurationRoot()
              if self.verbose > 1:
!                 # filename included in exception!
!                 print "IOError loading configuration (%s) - using default:" % (details)
          return ret
  
***************
*** 183,241 ****
              message = messages.GetNext()
  
- # configuration stuff we persist.
- class _ConfigurationContainer:
-     def __init__(self, **kw):
-         self.__dict__.update(kw)
-     def __setstate__(self, state):
-         self.__init__() # ensure any new/default values setup
-         self.__dict__.update(state)
-     def _dump(self, thisname="<root>", level=0):
-         import pprint
-         prefix = "  " * level
-         print "%s%s:" % (prefix, thisname)
-         for name, ob in self.__dict__.items():
-             d = getattr(ob, "_dump", None)
-             if d is None:
-                 print "%s %s: %s" % (prefix, name, pprint.pformat(ob))
-             else:
-                 d(name, level+1)
- 
- class _ConfigurationRoot(_ConfigurationContainer):
-     def __init__(self):
-         self.training = _ConfigurationContainer(
-             ham_folder_ids = [],
-             ham_include_sub = False,
-             spam_folder_ids = [],
-             spam_include_sub = False,
-             )
-         self.classify = _ConfigurationContainer(
-             folder_ids = [],
-             include_sub = False,
-             field_name = "SpamProb",
-             )
-         self.filter = _ConfigurationContainer(
-             folder_ids = [],
-             include_sub = False,
-             )
-         self.filter_now = _ConfigurationContainer(
-             folder_ids = [],
-             include_sub = False,
-             only_unread = False,
-             )
-         self.rules = []
- 
- 
  _mgr = None
  
! def GetManager():
      global _mgr
      if _mgr is None:
!         _mgr = BayesManager()
      return _mgr
  
! if __name__=='__main__':
      try:
!         mgr = BayesManager()
      except ManagerError, d:
          print "Error initializing Bayes manager"
          print d
--- 202,264 ----
              message = messages.GetNext()
  
  _mgr = None
  
! def GetManager(verbose=1):
      global _mgr
      if _mgr is None:
!         _mgr = BayesManager(verbose=verbose)
!     # If requesting greater verbosity, honour it
!     if verbose > _mgr.verbose:
!         _mgr.verbose = verbose
      return _mgr
  
! def ShowManager(mgr):
!     def do_train(dlg):
!         import train
!         import dialogs.TrainingDialog
!         d = dialogs.TrainingDialog.TrainingDialog(dlg.mgr, train.trainer)
!         d.DoModal()
!         
!     def do_classify(dlg):
!         import classify
!         import dialogs.ClassifyDialog
!         d = dialogs.ClassifyDialog.ClassifyDialog(dlg.mgr, classify.classifier)
!         d.DoModal()
! 
!     def do_filter(dlg):
!         import filter, rule
!         import dialogs.FilterDialog
!         d = dialogs.FilterDialog.FilterArrivalsDialog(dlg.mgr, rule.Rule, filter.filterer)
!         d.DoModal()
!         
!     import dialogs.ManagerDialog
!     d = dialogs.ManagerDialog.ManagerDialog(mgr, do_train, do_filter, do_classify)
!     d.DoModal()
! 
! def main(verbose_level = 1):
      try:
!         mgr = GetManager(verbose=verbose_level)
      except ManagerError, d:
          print "Error initializing Bayes manager"
          print d
+         return 1
+     ShowManager(mgr)
+     mgr.Save()
+     mgr.Close()
+ 
+ def usage():
+     print "Usage: manager [-v ...]"
+     sys.exit(1)
+         
+ if __name__=='__main__':
+     verbose = 1
+     import getopt
+     opts, args = getopt.getopt(sys.argv[1:], "v")
+     if args:
+         usage()
+     for opt, val in opts:
+         if opt=="-v":
+             verbose += 1
+         else:
+             usage()
+     main(verbose)

Index: rule.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/rule.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** rule.py	19 Oct 2002 18:14:01 -0000	1.2
--- rule.py	20 Oct 2002 07:47:00 -0000	1.3
***************
*** 3,6 ****
--- 3,8 ----
  import time
  
+ MAPI_E_NOT_FOUND = -2147221233
+ 
  class Rule:
      def __init__(self):
***************
*** 44,57 ****
              return False
          # Do mods before we move.
          outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")
!         outlook_message = outlook_ns.GetItemFromID(msg.ID)
          if self.flag_message:
              outlook_message.FlagRequest = "Check Spam"
              outlook_message.FlagStatus = constants.olFlagMarked
!             outlook_message.Save()
!         if self.write_field:
              format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
              prop = outlook_message.UserProperties.Add(self.write_field_name, constants.olNumber, True, format)
              prop.Value = prob
              outlook_message.Save()
  
--- 46,69 ----
              return False
          # Do mods before we move.
+         dirty = False
          outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")
!         try:
!             outlook_message = outlook_ns.GetItemFromID(msg.ID)
!         except pythoncom.com_error, (hr, desc, exc, arg):
!             if not exc or exc[5] != MAPI_E_NOT_FOUND:
!                 raise
!             print "Warning: Can't open the message - it has probably been moved"
!             return False
! 
          if self.flag_message:
              outlook_message.FlagRequest = "Check Spam"
              outlook_message.FlagStatus = constants.olFlagMarked
!             dirty = True
!         if self.write_field:            
              format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
              prop = outlook_message.UserProperties.Add(self.write_field_name, constants.olNumber, True, format)
              prop.Value = prob
+             dirty = True
+         if dirty:        
              outlook_message.Save()
  
***************
*** 61,65 ****
              outlook_message.Copy(outlook_ns.GetFolderFromID(self.folder_id))
          elif self.action == "Move":
-             print "moving", self.flag_message
              outlook_message.Move(outlook_ns.GetFolderFromID(self.folder_id))
          else:
--- 73,76 ----