[Spambayes-checkins] spambayes/Outlook2000 oastats.py, NONE, 1.2.2.1 addin.py, 1.112, 1.112.2.1 config_wizard.py, 1.7, 1.7.2.1 filter.py, 1.33, 1.33.2.1 manager.py, 1.87, 1.87.2.1 msgstore.py, 1.76, 1.76.2.1

Mark Hammond mhammond at users.sourceforge.net
Mon Nov 3 04:29:45 EST 2003


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv24152

Modified Files:
      Tag: release_1_0
	addin.py config_wizard.py filter.py manager.py msgstore.py 
Added Files:
      Tag: release_1_0
	oastats.py 
Log Message:
Moving Outlook to the release_1_0 tag in preparation for a release.
Created tag outlook-1-0-fork before the merge.


--- NEW FILE: oastats.py ---
# oastats.py - Outlook Addin Stats

class Stats:
    def __init__(self, config):
        self.config = config
        self.Reset()
    def Reset(self):
        self.num_ham = self.num_spam = self.num_unsure = 0
        self.num_deleted_spam = self.num_deleted_spam_fn  = 0
        self.num_recovered_good = self.num_recovered_good_fp = 0
    def RecordClassification(self, score):
        score *= 100 # same units as our config values.
        if score >= self.config.filter.spam_threshold:
            self.num_spam += 1
        elif score >= self.config.filter.unsure_threshold:
            self.num_unsure += 1
        else:
            self.num_ham += 1
    def RecordManualClassification(self, recover_as_good, score):
        score *= 100 # same units as our config values.
        if recover_as_good:
            self.num_recovered_good += 1
            # If we are recovering an item that is in the "spam" threshold,
            # then record it as a "false positive"
            if score > self.config.filter.spam_threshold:
                self.num_recovered_good_fp += 1
        else:
            self.num_deleted_spam += 1
            # If we are deleting as Spam an item that was in our "good" range,
            # then record it as a false neg.
            if score < self.config.filter.unsure_threshold:
                self.num_deleted_spam_fn += 1
    def GetStats(self):
        num_seen = self.num_ham + self.num_spam + self.num_unsure
        if num_seen==0:
            return ["SpamBayes has processed zero messages"]
        chunks = []
        push = chunks.append
        perc_ham = 100.0 * self.num_ham / num_seen
        perc_spam = 100.0 * self.num_spam / num_seen
        perc_unsure = 100.0 * self.num_unsure / num_seen
        format_dict = dict(perc_spam=perc_spam, perc_ham=perc_ham,
                           perc_unsure=perc_unsure, num_seen = num_seen)
        format_dict.update(self.__dict__)
        push("SpamBayes has processed %(num_seen)d messages - " \
             "%(num_ham)d (%(perc_ham).0f%%) good, " \
             "%(num_spam)d (%(perc_spam).0f%%) spam " \
             "and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
        if self.num_recovered_good:
            push("%(num_recovered_good)d message(s) were manually " \
                 "classified as good (with %(num_recovered_good_fp)d " \
                 "being false positives)" % format_dict)
        else:
            push("No messages were manually classified as good")
        if self.num_deleted_spam:
            push("%(num_deleted_spam)d message(s) were manually " \
                 "classified as spam (with %(num_deleted_spam_fn)d " \
                 "being false negatives)" % format_dict)
        else:
            push("No messages were manually classified as spam")
        return chunks

if __name__=='__main__':
    class FilterConfig:
        unsure_threshold = 15
        spam_threshold = 85
    class Config:
        filter = FilterConfig()
    # processed zero
    s = Stats(Config())
    print "\n".join(s.GetStats())
    # No recovery
    s = Stats(Config())
    s.RecordClassification(.2)
    print "\n".join(s.GetStats())
    
    s = Stats(Config())
    s.RecordClassification(.2)
    s.RecordClassification(.1)
    s.RecordClassification(.4)
    s.RecordClassification(.9)
    s.RecordManualClassification(True, 0.1)
    s.RecordManualClassification(True, 0.9)
    s.RecordManualClassification(False, 0.1)
    s.RecordManualClassification(False, 0.9)
    print "\n".join(s.GetStats())

Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.112
retrieving revision 1.112.2.1
diff -C2 -d -r1.112 -r1.112.2.1
*** addin.py	15 Sep 2003 06:26:35 -0000	1.112
--- addin.py	3 Nov 2003 09:29:43 -0000	1.112.2.1
***************
*** 21,25 ****
  # * Our config files also want a consistent locale, so periods and commas
  #   are the same when they are read as when they are written.
! # So, at a few opportune times, we simple set it back.
  # We do it here as early as possible, before any imports that may see this
  #
--- 21,25 ----
  # * Our config files also want a consistent locale, so periods and commas
  #   are the same when they are read as when they are written.
! # So, at a few opportune times, we simply set it back.
  # We do it here as early as possible, before any imports that may see this
  #
***************
*** 30,43 ****
  locale.setlocale(locale.LC_NUMERIC, "C")
  
- if sys.version_info >= (2, 3):
-     # sick off the new hex() warnings!
-     # todo - remove this - win32all has removed all these warnings
-     # (but we will wait some time for people to update)
-     warnings.filterwarnings("ignore", category=FutureWarning, append=1)
-     # Binary builds can avoid our pendingdeprecation too
-     if hasattr(sys, "frozen"):
-         warnings.filterwarnings("ignore", category=DeprecationWarning, append=1)
- 
- 
  from win32com import universal
  from win32com.server.exception import COMException
--- 30,33 ----
***************
*** 611,615 ****
          if not self.manager.config.filter.enabled:
              self.manager.ReportError(
!                 "You must enable SpamBayes before you can delete as spam")
              return
          SetWaitCursor(1)
--- 601,606 ----
          if not self.manager.config.filter.enabled:
              self.manager.ReportError(
!                 "You must configure and enable SpamBayes before you can " \
!                 "delete as spam")
              return
          SetWaitCursor(1)
***************
*** 631,634 ****
--- 622,628 ----
          new_msg_state = self.manager.config.general.delete_as_spam_message_state
          for msgstore_message in msgstore_messages:
+             # Record this recovery in our stats.
+             self.manager.stats.RecordManualClassification(False,
+                                     self.manager.score(msgstore_message))
              # Must train before moving, else we lose the message!
              subject = msgstore_message.GetSubject()
***************
*** 666,670 ****
          if not self.manager.config.filter.enabled:
              self.manager.ReportError(
!                 "You must enable SpamBayes before you can recover spam")
              return
          SetWaitCursor(1)
--- 660,665 ----
          if not self.manager.config.filter.enabled:
              self.manager.ReportError(
!                 "You must configure and enable SpamBayes before you can " \
!                 "recover spam")
              return
          SetWaitCursor(1)
***************
*** 680,683 ****
--- 675,681 ----
              # that the source folder == dest folder - restore to
              # the inbox in this case.
+             # (But more likely is that the original store may be read-only
+             # so we were unable to record the initial folder, as we save it
+             # *before* we do the move (and saving after is hard))
              try:
                  subject = msgstore_message.GetSubject()
***************
*** 688,691 ****
--- 686,692 ----
                      restore_folder = inbox_folder
  
+                 # Record this recovery in our stats.
+                 self.manager.stats.RecordManualClassification(True,
+                                         self.manager.score(msgstore_message))
                  # Must train before moving, else we lose the message!
                  print "Recovering to folder '%s' and ham training message '%s' - " % (restore_folder.name, subject),
***************
*** 1235,1239 ****
  
      def ProcessMissedMessages(self):
-         # This could possibly spawn threads if it was too slow!
          from time import clock
          config = self.manager.config.filter
--- 1236,1239 ----
***************
*** 1298,1302 ****
              if existing is None or existing.__class__ != HandlerClass:
                  folder = msgstore_folder.GetOutlookItem()
!                 name = msgstore_folder.name
                  try:
                      new_hook = DispatchWithEvents(folder.Items, HandlerClass)
--- 1298,1302 ----
              if existing is None or existing.__class__ != HandlerClass:
                  folder = msgstore_folder.GetOutlookItem()
!                 name = msgstore_folder.GetFQName()
                  try:
                      new_hook = DispatchWithEvents(folder.Items, HandlerClass)
***************
*** 1319,1323 ****
                          tb = None # dont want it, and nuke circular ref
                          traceback.print_exception(etype, value, tb)
!                     print "SpamBayes: Watching for new messages in folder ", name
              else:
                  new_hooks[msgstore_folder.id] = existing
--- 1319,1323 ----
                          tb = None # dont want it, and nuke circular ref
                          traceback.print_exception(etype, value, tb)
!                     print "SpamBayes: Watching for new messages in folder", name
              else:
                  new_hooks[msgstore_folder.id] = existing
***************
*** 1339,1345 ****
              # it (ie, the dialog)
              self.manager.Save()
!             stats = self.manager.stats
!             print "SpamBayes processed %d messages, finding %d spam and %d unsure" % \
!                 (stats.num_seen, stats.num_spam, stats.num_unsure)
              self.manager.Close()
              self.manager = None
--- 1339,1344 ----
              # it (ie, the dialog)
              self.manager.Save()
!             # Report some simple stats.
!             print "\r\n".join(self.manager.stats.GetStats())
              self.manager.Close()
              self.manager = None

Index: config_wizard.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/config_wizard.py,v
retrieving revision 1.7
retrieving revision 1.7.2.1
diff -C2 -d -r1.7 -r1.7.2.1
*** config_wizard.py	29 Aug 2003 00:08:43 -0000	1.7
--- config_wizard.py	3 Nov 2003 09:29:43 -0000	1.7.2.1
***************
*** 44,49 ****
          for id in ids:
              # Only get the folders that actually exist.
!             if manager.message_store.GetFolder(id) is not None:
                  new_config.filter.watch_folder_ids.append(id)
      if not new_config.filter.watch_folder_ids:
          for folder in manager.message_store.YieldReceiveFolders():
--- 44,53 ----
          for id in ids:
              # Only get the folders that actually exist.
!             try:
!                 manager.message_store.GetFolder(id)
!                 # if we get here, it exists!
                  new_config.filter.watch_folder_ids.append(id)
+             except manager.message_store.MsgStoreException:
+                 pass
      if not new_config.filter.watch_folder_ids:
          for folder in manager.message_store.YieldReceiveFolders():
***************
*** 52,64 ****
          fc = manager.config.filter
          if fc.spam_folder_id:
!             folder = manager.message_store.GetFolder(fc.spam_folder_id)
!             if folder is not None:
                  new_config.filter.spam_folder_id = folder.GetID()
                  wc.spam_folder_name = ""
          if fc.unsure_folder_id:
!             folder = manager.message_store.GetFolder(fc.unsure_folder_id)
!             if folder is not None:
                  new_config.filter.unsure_folder_id = folder.GetID()
                  wc.unsure_folder_name = ""
          tc = manager.config.training
          if tc.ham_folder_ids:
--- 56,72 ----
          fc = manager.config.filter
          if fc.spam_folder_id:
!             try:
!                 folder = manager.message_store.GetFolder(fc.spam_folder_id)
                  new_config.filter.spam_folder_id = folder.GetID()
                  wc.spam_folder_name = ""
+             except manager.message_store.MsgStoreException:
+                 pass
          if fc.unsure_folder_id:
!             try:
!                 folder = manager.message_store.GetFolder(fc.unsure_folder_id)
                  new_config.filter.unsure_folder_id = folder.GetID()
                  wc.unsure_folder_name = ""
+             except manager.message_store.MsgStoreException:
+                 pass
          tc = manager.config.training
          if tc.ham_folder_ids:

Index: filter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v
retrieving revision 1.33
retrieving revision 1.33.2.1
diff -C2 -d -r1.33 -r1.33.2.1
*** filter.py	19 Sep 2003 04:03:38 -0000	1.33
--- filter.py	3 Nov 2003 09:29:43 -0000	1.33.2.1
***************
*** 13,26 ****
      config = mgr.config.filter
      prob = mgr.score(msg)
!     mgr.stats.num_seen += 1
      prob_perc = prob * 100
      if prob_perc >= config.spam_threshold:
          disposition = "Yes"
          attr_prefix = "spam"
-         mgr.stats.num_spam += 1
      elif prob_perc >= config.unsure_threshold:
          disposition = "Unsure"
          attr_prefix = "unsure"
-         mgr.stats.num_unsure += 1
      else:
          disposition = "No"
--- 13,24 ----
      config = mgr.config.filter
      prob = mgr.score(msg)
!     mgr.stats.RecordClassification(prob)
      prob_perc = prob * 100
      if prob_perc >= config.spam_threshold:
          disposition = "Yes"
          attr_prefix = "spam"
      elif prob_perc >= config.unsure_threshold:
          disposition = "Unsure"
          attr_prefix = "unsure"
      else:
          disposition = "No"
***************
*** 61,65 ****
                  msg.SetReadState(True)
              if action.startswith("un"): # untouched
!                 pass
              elif action.startswith("co"): # copied
                  try:
--- 59,63 ----
                  msg.SetReadState(True)
              if action.startswith("un"): # untouched
!                 mgr.LogDebug(1, "Not touching message '%s'" % msg.subject)
              elif action.startswith("co"): # copied
                  try:
***************
*** 70,73 ****
--- 68,73 ----
                  else:
                      msg.CopyToReportingError(mgr, dest_folder)
+                     mgr.LogDebug(1, "Copied message '%s' to folder '%s'" \
+                                  % (msg.subject, dest_folder.GetFQName()))
              elif action.startswith("mo"): # Moved
                  try:
***************
*** 78,81 ****
--- 78,83 ----
                  else:
                      msg.MoveToReportingError(mgr, dest_folder)
+                     mgr.LogDebug(1, "Moved message '%s' to folder '%s'" \
+                                  % (msg.subject, dest_folder.GetFQName()))
              else:
                  raise RuntimeError, "Eeek - bad action '%r'" % (action,)

Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.87
retrieving revision 1.87.2.1
diff -C2 -d -r1.87 -r1.87.2.1
*** manager.py	10 Sep 2003 07:42:45 -0000	1.87
--- manager.py	3 Nov 2003 09:29:43 -0000	1.87.2.1
***************
*** 15,18 ****
--- 15,19 ----
  
  import msgstore
+ import oastats
  
  try:
***************
*** 133,139 ****
      pass
  
- class Stats:
-     def __init__(self):
-         self.num_seen = self.num_spam = self.num_unsure = 0
  
  # Function to "safely" save a pickle, only overwriting
--- 134,137 ----
***************
*** 323,327 ****
          self.addin = None
          self.verbose = verbose
-         self.stats = Stats()
          self.outlook = outlook
          self.dialog_parser = None
--- 321,324 ----
***************
*** 386,389 ****
--- 383,387 ----
          self.classifier_data = ClassifierData(db_manager, self)
          self.LoadBayes()
+         self.stats = oastats.Stats(self.config)
  
      # "old" bayes functions - new code should use "classifier_data" directly

Index: msgstore.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v
retrieving revision 1.76
retrieving revision 1.76.2.1
diff -C2 -d -r1.76 -r1.76.2.1
*** msgstore.py	15 Sep 2003 06:25:33 -0000	1.76
--- msgstore.py	3 Nov 2003 09:29:43 -0000	1.76.2.1
***************
*** 169,173 ****
          self.mapi_msg_stores = {}
          self.default_store_bin_eid = None
-         self._GetMessageStore(None)
          os.chdir(cwd)
  
--- 169,172 ----





More information about the Spambayes-checkins mailing list