[Spambayes-checkins]
spambayes/Outlook2000 addin.py,NONE,1.1 config.py,NONE,1.1
classify.py,1.3,1.4 filter.py,1.4,1.5 manager.py,1.9,1.10rule.py,1.2,1.3
Mark Hammond
mhammond@users.sourceforge.net
Sun, 20 Oct 2002 00:47:03 -0700
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory usw-pr-cvs1:/tmp/cvs-serv30441
Modified Files:
classify.py filter.py manager.py rule.py
Added Files:
addin.py config.py
Log Message:
Brand spanking new version that actually filters mail as they
arrive (woo hooo)
--- NEW FILE: addin.py ---
# Mark's Outlook addin
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, append=1) # sick off the new hex() warnings!
import sys
from win32com import universal
from win32com.server.exception import COMException
from win32com.client import gencache, DispatchWithEvents, Dispatch
import winerror
import win32api
import pythoncom
from win32com.client import constants
# If we are not running in a console, redirect all print statements to the
# win32traceutil collector.
# You can view output either from Pythonwin's "Tools->Trace Collector Debugging Tool",
# or simply run "win32traceutil.py" from a command prompt.
try:
win32api.GetConsoleTitle()
except win32api.error:
# No console - redirect
import win32traceutil
print "Outlook Spam Addin module loading"
# A lovely big block that attempts to catch the most common errors - COM objects not installed.
try:
# Support for COM objects we use.
gencache.EnsureModule('{00062FFF-0000-0000-C000-000000000046}', 0, 9, 0, bForDemand=True) # Outlook 9
gencache.EnsureModule('{2DF8D04C-5BFA-101B-BDE5-00AA0044DE52}', 0, 2, 1, bForDemand=True) # Office 9
gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}', 0, 1, 21, bForDemand = True) # CDO
# The TLB defiining the interfaces we implement
universal.RegisterInterfaces('{AC0714F2-3D04-11D1-AE7D-00A0C90F26F4}', 0, 1, 0, ["_IDTExtensibility2"])
except pythoncom.com_error, (hr, msg, exc, arg):
if __name__ != '__main__':
# Error when not running as a script - eeek - just let it go.
raise
try:
pythoncom.MakeIID("MAPI.Session")
have_cdo = True
except pythoncom.com_error:
have_cdo = False
print "This Addin requires that Outlook 2000 with CDO be installed on this machine."
print
if have_cdo:
print "However, these appear to be installed. Error details:"
print "COM Error 0x%x (%s)" % (hr, msg)
if exc:
print "Exception: %s" % (exc)
print
print "Sorry, I can't be more help, but I can't continue while I have this error."
else:
print "CDO is not currently installed. To install CDO, you must locate the"
print "media from which you installed Outlook (such as Office 2000 CD or "
print "sharepoint), re-run setup, select Outlook, enable CDO."
print
print "Please install CDO then attempt this registration again."
sys.exit(1)
# Whew - we seem to have all the COM support we need - let's rock!
class ButtonEvent:
def Init(self, handler, args = ()):
self.handler = handler
self.args = args
def OnClick(self, button, cancel):
self.handler(*self.args)
class FolderItemsEvent:
def Init(self, target, application, manager):
self.application = application
self.manager = manager
self.target = target
def OnItemAdd(self, item):
if self.manager.config.filter.enabled:
mapi_message = self.manager.mapi.GetMessage(item.EntryID)
import filter
num_rules = filter.filter_message(mapi_message, self.manager)
print "%d Spam rules fired for message '%s'" % (num_rules, item.Subject.encode("ascii", "replace"))
else:
print "Spam filtering is disabled - ignoring new message"
class OutlookAddin:
_com_interfaces_ = ['_IDTExtensibility2']
_public_methods_ = []
_reg_clsctx_ = pythoncom.CLSCTX_INPROC_SERVER
_reg_clsid_ = "{3556EDEE-FC91-4cf2-A0E4-7489747BAB10}"
_reg_progid_ = "SpamBayes.OutlookAddin"
_reg_policy_spec_ = "win32com.server.policy.EventHandlerPolicy"
def __init__(self):
self.folder_hooks = {}
self.application = None
def OnConnection(self, application, connectMode, addin, custom):
print "SpamAddin - Connecting to Outlook"
self.application = application
# Create our bayes manager
import manager
self.manager = manager.GetManager()
# ActiveExplorer may be none when started without a UI (eg, WinCE synchronisation)
activeExplorer = application.ActiveExplorer()
if activeExplorer is not None:
bars = activeExplorer.CommandBars
toolbar = bars.Item("Standard")
item = toolbar.Controls.Add(Type=constants.msoControlButton, Temporary=True)
# Hook events for the item
item = self.toolbarButton = DispatchWithEvents(item, ButtonEvent)
item.Init(manager.ShowManager, (self.manager,))
item.Caption="Anti-Spam"
item.TooltipText = "Define anti-spam filters"
item.Enabled = True
# Create a notification hook for all folders we filter.
self.UpdateFolderHooks()
def UpdateFolderHooks(self):
new_hooks = {}
for mapi_folder in self.manager.BuildFolderList(self.manager.config.filter.folder_ids, self.manager.config.filter.include_sub):
eid = mapi_folder.ID
existing = self.folder_hooks.get(eid)
if existing is None:
folder = self.application.GetNamespace("MAPI").GetFolderFromID(eid)
try:
new_hook = DispatchWithEvents(folder.Items, FolderItemsEvent)
except ValueError:
print "WARNING: Folder '%s' can not hook events" % (folder.Name,)
new_hook = None
if new_hook is not None:
new_hook.Init(folder, self.application, self.manager)
new_hooks[eid] = new_hook
print "Created new message hook for", folder.Name
else:
new_hooks[eid] = existing
self.folder_hooks = new_hooks
def OnDisconnection(self, mode, custom):
print "SpamAddin - Disconnecting from Outlook"
self.folder_hooks = None
self.application = None
if self.manager is not None:
self.manager.Save()
self.manager.Close()
self.manager = None
def OnAddInsUpdate(self, custom):
print "SpamAddin - OnAddInsUpdate", custom
def OnStartupComplete(self, custom):
print "SpamAddin - OnStartupComplete", custom
def OnBeginShutdown(self, custom):
print "SpamAddin - OnBeginShutdown", custom
def RegisterAddin(klass):
import _winreg
key = _winreg.CreateKey(_winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Office\\Outlook\\Addins")
subkey = _winreg.CreateKey(key, klass._reg_progid_)
_winreg.SetValueEx(subkey, "CommandLineSafe", 0, _winreg.REG_DWORD, 0)
_winreg.SetValueEx(subkey, "LoadBehavior", 0, _winreg.REG_DWORD, 3)
_winreg.SetValueEx(subkey, "Description", 0, _winreg.REG_SZ, klass._reg_progid_)
_winreg.SetValueEx(subkey, "FriendlyName", 0, _winreg.REG_SZ, klass._reg_progid_)
def UnregisterAddin(klass):
import _winreg
try:
_winreg.DeleteKey(_winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Office\\Outlook\\Addins\\" + klass._reg_progid_)
except WindowsError:
pass
if __name__ == '__main__':
import win32com.server.register
win32com.server.register.UseCommandLine(OutlookAddin)
if "--unregister" in sys.argv:
UnregisterAddin(OutlookAddin)
else:
RegisterAddin(OutlookAddin)
--- NEW FILE: config.py ---
# configuration stuff we persist via a pickle
# Can't be defined in any module that may be used as "__main__"
# or as a module.
import pprint
class _ConfigurationContainer:
def __init__(self, **kw):
self.__dict__.update(kw)
# Crap state-loading code so when we load an early version of the pickle
# any attributes in the new version are considered defaults.
# XXX - I really really want a better scheme than pickles etc here :(
def _update_from(self, dict):
for name, val in dict.items():
updater = getattr(val, "_update_from", None)
if updater is not None and self.__dict__.has_key(name):
self.__dict__[name]._update_from(val.__dict__)
else:
self.__dict__[name] = val
def __setstate__(self, state):
self.__init__() # ensure any new/default values setup
self._update_from(state)
def _dump(self, thisname="<root>", level=0):
import pprint
prefix = " " * level
print "%s%s:" % (prefix, thisname)
for name, ob in self.__dict__.items():
d = getattr(ob, "_dump", None)
if d is None:
print "%s %s: %s" % (prefix, name, pprint.pformat(ob))
else:
d(name, level+1)
class ConfigurationRoot(_ConfigurationContainer):
def __init__(self):
self.training = _ConfigurationContainer(
ham_folder_ids = [],
ham_include_sub = False,
spam_folder_ids = [],
spam_include_sub = False,
)
self.classify = _ConfigurationContainer(
folder_ids = [],
include_sub = False,
field_name = "SpamProb",
)
self.filter = _ConfigurationContainer(
folder_ids = [],
include_sub = False,
enabled = False,
)
self.filter_now = _ConfigurationContainer(
folder_ids = [],
include_sub = False,
only_unread = False,
)
self.rules = []
if __name__=='__main__':
print "Please run 'manager.py'"
Index: classify.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/classify.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** classify.py 19 Oct 2002 22:30:02 -0000 1.3
--- classify.py 20 Oct 2002 07:47:00 -0000 1.4
***************
*** 13,17 ****
def classify_folder( f, mgr, config, progress):
! hammie = Hammie(mgr.bayes)
messages = f.Messages
pythoncom.CoInitialize() # We are called on a different thread.
--- 13,17 ----
def classify_folder( f, mgr, config, progress):
! hammie = mgr.hammie
messages = f.Messages
pythoncom.CoInitialize() # We are called on a different thread.
***************
*** 30,35 ****
body = message.Text.encode('ascii', 'replace')
text = headers + body
!
! prob, clues = hammie.score(text, evidence=True)
added_prop = False
try:
--- 30,34 ----
body = message.Text.encode('ascii', 'replace')
text = headers + body
! prob = hammie.score(text, evidence=False)
added_prop = False
try:
Index: filter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** filter.py 19 Oct 2002 22:30:02 -0000 1.4
--- filter.py 20 Oct 2002 07:47:00 -0000 1.5
***************
*** 9,18 ****
import rule
! from hammie import Hammie
def filter_folder(f, mgr, progress, filter):
only_unread = filter.only_unread
- hammie = Hammie(mgr.bayes)
num_messages = 0
for message in mgr.YieldMessageList(f):
if progress.stop_requested():
--- 9,39 ----
import rule
! def filter_message(message, mgr):
! try:
! headers = message.Fields[0x7D001E].Value
! headers = headers.encode('ascii', 'replace')
! body = message.Text.encode('ascii', 'replace')
! text = headers + body
! except pythoncom.com_error, d:
! print "Failed to get a message: %s" % (d,)
! return
!
! prob = mgr.hammie.score(text, evidence=False)
! num_rules = 0
! for rule in mgr.config.rules:
! if rule.enabled:
! try:
! if rule.Act(mgr, message, prob):
! num_rules += 1
! except:
! print "Rule failed!"
! import traceback
! traceback.print_exc()
! return num_rules
def filter_folder(f, mgr, progress, filter):
only_unread = filter.only_unread
num_messages = 0
+ hammie = mgr.hammie
for message in mgr.YieldMessageList(f):
if progress.stop_requested():
***************
*** 21,47 ****
if only_unread and not message.Unread:
continue
!
! try:
! headers = message.Fields[0x7D001E].Value
! headers = headers.encode('ascii', 'replace')
! body = message.Text.encode('ascii', 'replace')
! text = headers + body
! except pythoncom.com_error, d:
! progress.warning("Failed to get a message: %s" % (str(d),) )
! continue
!
! prob, clues = hammie.score(text, evidence=True)
! did_this_message = False
! for rule in mgr.config.rules:
! if rule.enabled:
! try:
! if rule.Act(mgr, message, prob):
! did_this_message = True
! except:
! print "Rule failed!"
! import traceback
! traceback.print_exc()
! if did_this_message:
num_messages += 1
return num_messages
--- 42,48 ----
if only_unread and not message.Unread:
continue
! if filter_message(message, mgr):
num_messages += 1
+
return num_messages
Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** manager.py 19 Oct 2002 22:57:59 -0000 1.9
--- manager.py 20 Oct 2002 07:47:00 -0000 1.10
***************
*** 6,23 ****
import thread
- import classifier
- from tokenizer import tokenize
import win32com.client
import win32com.client.gencache
import pythoncom
! # Suck in CDO type lib
! win32com.client.gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}',
! 0, 1, 21, bForDemand=True)
try:
! this_filename = __file__
except NameError:
! this_filename = sys.argv[0]
class ManagerError(Exception):
--- 6,36 ----
import thread
import win32com.client
import win32com.client.gencache
import pythoncom
! import config
try:
! this_filename = os.path.abspath(__file__)
except NameError:
! this_filename = os.path.abspath(sys.argv[0])
!
! # This is a little of a hack <wink>. We are generally in a child directory of the
! # bayes code. To help installation, we handle the fact that this may not be
! # on sys.path.
! try:
! import classifier
! except ImportError:
! parent = os.path.abspath(os.path.join(os.path.dirname(this_filename), ".."))
! sys.path.insert(0, parent)
! del parent
! import classifier
!
! import hammie
!
! # Suck in CDO type lib
! win32com.client.gencache.EnsureModule('{3FA7DEA7-6438-101B-ACC1-00AA00423326}',
! 0, 1, 21, bForDemand=True)
class ManagerError(Exception):
***************
*** 75,78 ****
--- 88,92 ----
bayes = None
try:
+ os.environ["BAYESCUSTOMIZE"]=self.ini_filename
bayes = cPickle.load(open(self.bayes_filename,'rb'))
print "Loaded bayes database from '%s'" % (self.bayes_filename,)
***************
*** 90,93 ****
--- 104,108 ----
"%d spam and %d good messages" % (bayes.nspam, bayes.nham))
self.bayes = bayes
+ self.hammie = hammie.Hammie(bayes)
self.bayes_dirty = False
***************
*** 108,117 ****
ret._dump()
except (AttributeError, ImportError):
! ret = _ConfigurationRoot()
if self.verbose > 1:
! print ("FAILED to load configuration from '%s "
! "- using default:" % self.config_filename)
! import traceback
! traceback.print_exc()
return ret
--- 123,136 ----
ret._dump()
except (AttributeError, ImportError):
! ret = config.ConfigurationRoot()
! print "FAILED to load configuration from '%s' - using default:" % (self.config_filename,)
! import traceback
! traceback.print_exc()
! except IOError, details:
! # File-not-found - less serious.
! ret = config.ConfigurationRoot()
if self.verbose > 1:
! # filename included in exception!
! print "IOError loading configuration (%s) - using default:" % (details)
return ret
***************
*** 183,241 ****
message = messages.GetNext()
- # configuration stuff we persist.
- class _ConfigurationContainer:
- def __init__(self, **kw):
- self.__dict__.update(kw)
- def __setstate__(self, state):
- self.__init__() # ensure any new/default values setup
- self.__dict__.update(state)
- def _dump(self, thisname="<root>", level=0):
- import pprint
- prefix = " " * level
- print "%s%s:" % (prefix, thisname)
- for name, ob in self.__dict__.items():
- d = getattr(ob, "_dump", None)
- if d is None:
- print "%s %s: %s" % (prefix, name, pprint.pformat(ob))
- else:
- d(name, level+1)
-
- class _ConfigurationRoot(_ConfigurationContainer):
- def __init__(self):
- self.training = _ConfigurationContainer(
- ham_folder_ids = [],
- ham_include_sub = False,
- spam_folder_ids = [],
- spam_include_sub = False,
- )
- self.classify = _ConfigurationContainer(
- folder_ids = [],
- include_sub = False,
- field_name = "SpamProb",
- )
- self.filter = _ConfigurationContainer(
- folder_ids = [],
- include_sub = False,
- )
- self.filter_now = _ConfigurationContainer(
- folder_ids = [],
- include_sub = False,
- only_unread = False,
- )
- self.rules = []
-
-
_mgr = None
! def GetManager():
global _mgr
if _mgr is None:
! _mgr = BayesManager()
return _mgr
! if __name__=='__main__':
try:
! mgr = BayesManager()
except ManagerError, d:
print "Error initializing Bayes manager"
print d
--- 202,264 ----
message = messages.GetNext()
_mgr = None
! def GetManager(verbose=1):
global _mgr
if _mgr is None:
! _mgr = BayesManager(verbose=verbose)
! # If requesting greater verbosity, honour it
! if verbose > _mgr.verbose:
! _mgr.verbose = verbose
return _mgr
! def ShowManager(mgr):
! def do_train(dlg):
! import train
! import dialogs.TrainingDialog
! d = dialogs.TrainingDialog.TrainingDialog(dlg.mgr, train.trainer)
! d.DoModal()
!
! def do_classify(dlg):
! import classify
! import dialogs.ClassifyDialog
! d = dialogs.ClassifyDialog.ClassifyDialog(dlg.mgr, classify.classifier)
! d.DoModal()
!
! def do_filter(dlg):
! import filter, rule
! import dialogs.FilterDialog
! d = dialogs.FilterDialog.FilterArrivalsDialog(dlg.mgr, rule.Rule, filter.filterer)
! d.DoModal()
!
! import dialogs.ManagerDialog
! d = dialogs.ManagerDialog.ManagerDialog(mgr, do_train, do_filter, do_classify)
! d.DoModal()
!
! def main(verbose_level = 1):
try:
! mgr = GetManager(verbose=verbose_level)
except ManagerError, d:
print "Error initializing Bayes manager"
print d
+ return 1
+ ShowManager(mgr)
+ mgr.Save()
+ mgr.Close()
+
+ def usage():
+ print "Usage: manager [-v ...]"
+ sys.exit(1)
+
+ if __name__=='__main__':
+ verbose = 1
+ import getopt
+ opts, args = getopt.getopt(sys.argv[1:], "v")
+ if args:
+ usage()
+ for opt, val in opts:
+ if opt=="-v":
+ verbose += 1
+ else:
+ usage()
+ main(verbose)
Index: rule.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/rule.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** rule.py 19 Oct 2002 18:14:01 -0000 1.2
--- rule.py 20 Oct 2002 07:47:00 -0000 1.3
***************
*** 3,6 ****
--- 3,8 ----
import time
+ MAPI_E_NOT_FOUND = -2147221233
+
class Rule:
def __init__(self):
***************
*** 44,57 ****
return False
# Do mods before we move.
outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")
! outlook_message = outlook_ns.GetItemFromID(msg.ID)
if self.flag_message:
outlook_message.FlagRequest = "Check Spam"
outlook_message.FlagStatus = constants.olFlagMarked
! outlook_message.Save()
! if self.write_field:
format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
prop = outlook_message.UserProperties.Add(self.write_field_name, constants.olNumber, True, format)
prop.Value = prob
outlook_message.Save()
--- 46,69 ----
return False
# Do mods before we move.
+ dirty = False
outlook_ns = mgr.GetOutlookForCurrentThread().GetNamespace("MAPI")
! try:
! outlook_message = outlook_ns.GetItemFromID(msg.ID)
! except pythoncom.com_error, (hr, desc, exc, arg):
! if not exc or exc[5] != MAPI_E_NOT_FOUND:
! raise
! print "Warning: Can't open the message - it has probably been moved"
! return False
!
if self.flag_message:
outlook_message.FlagRequest = "Check Spam"
outlook_message.FlagStatus = constants.olFlagMarked
! dirty = True
! if self.write_field:
format = 4 # 4=2 decimal, 3=1 decimal - index in "field chooser" combo when type=Number.
prop = outlook_message.UserProperties.Add(self.write_field_name, constants.olNumber, True, format)
prop.Value = prob
+ dirty = True
+ if dirty:
outlook_message.Save()
***************
*** 61,65 ****
outlook_message.Copy(outlook_ns.GetFolderFromID(self.folder_id))
elif self.action == "Move":
- print "moving", self.flag_message
outlook_message.Move(outlook_ns.GetFolderFromID(self.folder_id))
else:
--- 73,76 ----