[Spambayes-checkins] spambayes/Outlook2000 addin.py, 1.131, 1.132 manager.py, 1.96, 1.97 oastats.py, 1.3, 1.4

Fri Oct 15 01:36:15 CEST 2004

Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12187/Outlook2000

Modified Files:
	addin.py manager.py oastats.py 
Log Message:
Log the folder's name rather than id for OnItemAdd events.
Print out a nicer version of the date/time the log was created.

Add persistent statistics.  These are saved in a (very) little pickle in the data
 directory.  In the Advanced tab the persistent stats are shown; in the log both session
 only and total stats are shown.

Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.131
retrieving revision 1.132
diff -C2 -d -r1.131 -r1.132
*** addin.py	1 Oct 2004 14:31:34 -0000	1.131
--- addin.py	14 Oct 2004 23:36:12 -0000	1.132
***************
*** 380,384 ****
          # Callback from Outlook - locale may have changed.
          locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
!         self.manager.LogDebug(2, "OnItemAdd event for folder", self,
                                "with item", item.Subject.encode("mbcs", "ignore"))
          # Due to the way our "missed message" indicator works, we do
--- 380,384 ----
          # Callback from Outlook - locale may have changed.
          locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
!         self.manager.LogDebug(2, "OnItemAdd event for folder", self.name,
                                "with item", item.Subject.encode("mbcs", "ignore"))
          # Due to the way our "missed message" indicator works, we do
***************
*** 1242,1249 ****
                    (major, minor, spack, ver_str)
              print "using Python", sys.version
!             from time import localtime
!             ltime = localtime()
!             print "Log created %s-%s-%s" % \
!                   (ltime[0], ltime[1], ltime[2])

              self.explorers_events = None # create at OnStartupComplete
--- 1242,1247 ----
                    (major, minor, spack, ver_str)
              print "using Python", sys.version
!             from time import asctime, localtime
!             print "Log created", asctime(localtime())

              self.explorers_events = None # create at OnStartupComplete
***************
*** 1458,1463 ****
              # it (ie, the dialog)
              self.manager.Save()
!             # Report some simple stats.
              print "\r\n".join(self.manager.stats.GetStats())
              self.manager.Close()
              self.manager = None
--- 1456,1466 ----
              # it (ie, the dialog)
              self.manager.Save()
!             # Report some simple stats, for session, and for total.
!             print "Session:"
!             print "\r\n".join(self.manager.stats.GetStats(True))
!             print "Total:"
              print "\r\n".join(self.manager.stats.GetStats())
+             # Save stats.
+             self.manager.stats.Store()
              self.manager.Close()
              self.manager = None

Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.96
retrieving revision 1.97
diff -C2 -d -r1.96 -r1.97
*** manager.py	8 Feb 2004 22:29:45 -0000	1.96
--- manager.py	14 Oct 2004 23:36:12 -0000	1.97
***************
*** 404,408 ****
          self.classifier_data = ClassifierData(db_manager, self)
          self.LoadBayes()
!         self.stats = oastats.Stats(self.config)

      # "old" bayes functions - new code should use "classifier_data" directly
--- 404,408 ----
          self.classifier_data = ClassifierData(db_manager, self)
          self.LoadBayes()
!         self.stats = oastats.Stats(self.config, self.data_directory)

      # "old" bayes functions - new code should use "classifier_data" directly

Index: oastats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/oastats.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** oastats.py	16 Dec 2003 05:06:33 -0000	1.3
--- oastats.py	14 Oct 2004 23:36:12 -0000	1.4
***************
*** 1,7 ****
  # oastats.py - Outlook Addin Stats

  class Stats:
!     def __init__(self, config):
          self.config = config
          self.Reset()
      def Reset(self):
--- 1,23 ----
  # oastats.py - Outlook Addin Stats

+ import os
+ import pickle
+ 
+ STATS_FILENAME = "performance_statistics_database.pik"
+ 
  class Stats:
!     def __init__(self, config, data_directory):
          self.config = config
+         self.stored_statistics_fn = os.path.join(data_directory,
+                                                  STATS_FILENAME)
+         if os.path.exists(self.stored_statistics_fn):
+             self.Load()
+         else:
+             # Reset totals
+             self.totals = {}
+             for stat in ["num_ham", "num_spam", "num_unsure",
+                          "num_deleted_spam", "num_deleted_spam_fn",
+                          "num_recovered_good", "num_recovered_good_fp",]:
+                 self.totals[stat] = 0
          self.Reset()
      def Reset(self):
***************
*** 9,12 ****
--- 25,44 ----
          self.num_deleted_spam = self.num_deleted_spam_fn  = 0
          self.num_recovered_good = self.num_recovered_good_fp = 0
+     def Load(self):
+         store = open(self.stored_statistics_fn, 'rb')
+         self.totals = pickle.load(store)
+         store.close()
+     def Store(self):
+         # Update totals, and save that.
+         for stat in ["num_ham", "num_spam", "num_unsure",
+                      "num_deleted_spam", "num_deleted_spam_fn",
+                      "num_recovered_good", "num_recovered_good_fp",]:
+             self.totals[stat] += getattr(self, stat)
+         store = open(self.stored_statistics_fn, 'wb')
+         pickle.dump(self.totals, store)
+         store.close()
+         # Reset, or the reporting for the remainder of this session will be
+         # incorrect.
+         self.Reset()
      def RecordClassification(self, score):
          score *= 100 # same units as our config values.
***************
*** 31,51 ****
              if score < self.config.filter.unsure_threshold:
                  self.num_deleted_spam_fn += 1
!     def GetStats(self):
          num_seen = self.num_ham + self.num_spam + self.num_unsure
          if num_seen==0:
              return ["SpamBayes has processed zero messages"]
          chunks = []
          push = chunks.append
!         perc_ham = 100.0 * self.num_ham / num_seen
!         perc_spam = 100.0 * self.num_spam / num_seen
          perc_unsure = 100.0 * self.num_unsure / num_seen
!         format_dict = dict(perc_spam=perc_spam, perc_ham=perc_ham,
!                            perc_unsure=perc_unsure, num_seen = num_seen)
!         format_dict.update(self.__dict__)
          push("SpamBayes has processed %(num_seen)d messages - " \
               "%(num_ham)d (%(perc_ham).0f%%) good, " \
               "%(num_spam)d (%(perc_spam).0f%%) spam " \
               "and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
!         if self.num_recovered_good:
              push("%(num_recovered_good)d message(s) were manually " \
                   "classified as good (with %(num_recovered_good_fp)d " \
--- 63,120 ----
              if score < self.config.filter.unsure_threshold:
                  self.num_deleted_spam_fn += 1
!     def GetStats(self, session_only=False):
!         """Return a description of the statistics.
! 
!         If session_only is True, then only a description of the statistics
!         since we were last reset.  Otherwise, lifetime statistics (i.e.
!         those including the ones loaded).
! 
!         Users probably care most about persistent statistics, so present
!         those by default.  If session-only stats are desired, then a
!         special call to here can be made.
!         """
          num_seen = self.num_ham + self.num_spam + self.num_unsure
+         if not session_only:
+             totals = self.totals
+             num_seen += (totals["num_ham"] + totals["num_spam"] +
+                          totals["num_unsure"])
          if num_seen==0:
              return ["SpamBayes has processed zero messages"]
          chunks = []
          push = chunks.append
!         if session_only:
!             num_ham = self.num_ham
!             num_spam = self.num_spam
!             num_unsure = self.num_unsure
!             num_recovered_good = self.num_recovered_good
!             num_recovered_good_fp = self.num_recovered_good_fp
!             num_deleted_spam = self.num_deleted_spam
!             num_deleted_spam_fn = self.num_deleted_spam_fn
!         else:
!             num_ham = self.num_ham + self.totals["num_ham"]
!             num_spam = self.num_spam + self.totals["num_spam"]
!             num_unsure = self.num_unsure + self.totals["num_unsure"]
!             num_recovered_good = self.num_recovered_good + \
!                                  self.totals["num_recovered_good"]
!             num_recovered_good_fp = self.num_recovered_good_fp + \
!                                     self.totals["num_recovered_good_fp"]
!             num_deleted_spam = self.num_deleted_spam + \
!                                self.totals["num_deleted_spam"]
!             num_deleted_spam_fn = self.num_deleted_spam_fn + \
!                                   self.totals["num_deleted_spam_fn"]
!         perc_ham = 100.0 * num_ham / num_seen
!         perc_spam = 100.0 * num_spam / num_seen
          perc_unsure = 100.0 * self.num_unsure / num_seen
!         format_dict = locals().copy()
!         del format_dict["self"]
!         del format_dict["push"]
!         del format_dict["chunks"]
!         format_dict.update(dict(perc_spam=perc_spam, perc_ham=perc_ham,
!                                 perc_unsure=perc_unsure, num_seen=num_seen))
          push("SpamBayes has processed %(num_seen)d messages - " \
               "%(num_ham)d (%(perc_ham).0f%%) good, " \
               "%(num_spam)d (%(perc_spam).0f%%) spam " \
               "and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
!         if num_recovered_good:
              push("%(num_recovered_good)d message(s) were manually " \
                   "classified as good (with %(num_recovered_good_fp)d " \
***************
*** 53,57 ****
          else:
              push("No messages were manually classified as good")
!         if self.num_deleted_spam:
              push("%(num_deleted_spam)d message(s) were manually " \
                   "classified as spam (with %(num_deleted_spam_fn)d " \
--- 122,126 ----
          else:
              push("No messages were manually classified as good")
!         if num_deleted_spam:
              push("%(num_deleted_spam)d message(s) were manually " \
                   "classified as spam (with %(num_deleted_spam_fn)d " \
***************
*** 67,79 ****
      class Config:
          filter = FilterConfig()
      # processed zero
!     s = Stats(Config())
      print "\n".join(s.GetStats())
      # No recovery
!     s = Stats(Config())
      s.RecordClassification(.2)
      print "\n".join(s.GetStats())

!     s = Stats(Config())
      s.RecordClassification(.2)
      s.RecordClassification(.1)
--- 136,149 ----
      class Config:
          filter = FilterConfig()
+     data_directory = os.getcwd()
      # processed zero
!     s = Stats(Config(), data_directory)
      print "\n".join(s.GetStats())
      # No recovery
!     s = Stats(Config(), data_directory)
      s.RecordClassification(.2)
      print "\n".join(s.GetStats())

!     s = Stats(Config(), data_directory)
      s.RecordClassification(.2)
      s.RecordClassification(.1)
***************
*** 85,86 ****
--- 155,164 ----
      s.RecordManualClassification(False, 0.9)
      print "\n".join(s.GetStats())
+ 
+     # Store
+     # (this will leave an artifact in the cwd)
+     s.Store()
+     # Load
+     s = Stats(Config(), data_directory)
+     print "\n".join(s.GetStats())
+     print "\n".join(s.GetStats(True))