[Spambayes-checkins] spambayes/Outlook2000 addin.py, 1.131,
1.132 manager.py, 1.96, 1.97 oastats.py, 1.3, 1.4
Tony Meyer
anadelonbrin at users.sourceforge.net
Fri Oct 15 01:36:15 CEST 2004
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12187/Outlook2000
Modified Files:
addin.py manager.py oastats.py
Log Message:
Log the folder's name rather than id for OnItemAdd events.
Print out a nicer version of the date/time the log was created.
Add persistent statistics. These are saved in a (very) little pickle in the data
directory. In the Advanced tab the persistent stats are shown; in the log both session
only and total stats are shown.
Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.131
retrieving revision 1.132
diff -C2 -d -r1.131 -r1.132
*** addin.py 1 Oct 2004 14:31:34 -0000 1.131
--- addin.py 14 Oct 2004 23:36:12 -0000 1.132
***************
*** 380,384 ****
# Callback from Outlook - locale may have changed.
locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
! self.manager.LogDebug(2, "OnItemAdd event for folder", self,
"with item", item.Subject.encode("mbcs", "ignore"))
# Due to the way our "missed message" indicator works, we do
--- 380,384 ----
# Callback from Outlook - locale may have changed.
locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above
! self.manager.LogDebug(2, "OnItemAdd event for folder", self.name,
"with item", item.Subject.encode("mbcs", "ignore"))
# Due to the way our "missed message" indicator works, we do
***************
*** 1242,1249 ****
(major, minor, spack, ver_str)
print "using Python", sys.version
! from time import localtime
! ltime = localtime()
! print "Log created %s-%s-%s" % \
! (ltime[0], ltime[1], ltime[2])
self.explorers_events = None # create at OnStartupComplete
--- 1242,1247 ----
(major, minor, spack, ver_str)
print "using Python", sys.version
! from time import asctime, localtime
! print "Log created", asctime(localtime())
self.explorers_events = None # create at OnStartupComplete
***************
*** 1458,1463 ****
# it (ie, the dialog)
self.manager.Save()
! # Report some simple stats.
print "\r\n".join(self.manager.stats.GetStats())
self.manager.Close()
self.manager = None
--- 1456,1466 ----
# it (ie, the dialog)
self.manager.Save()
! # Report some simple stats, for session, and for total.
! print "Session:"
! print "\r\n".join(self.manager.stats.GetStats(True))
! print "Total:"
print "\r\n".join(self.manager.stats.GetStats())
+ # Save stats.
+ self.manager.stats.Store()
self.manager.Close()
self.manager = None
Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.96
retrieving revision 1.97
diff -C2 -d -r1.96 -r1.97
*** manager.py 8 Feb 2004 22:29:45 -0000 1.96
--- manager.py 14 Oct 2004 23:36:12 -0000 1.97
***************
*** 404,408 ****
self.classifier_data = ClassifierData(db_manager, self)
self.LoadBayes()
! self.stats = oastats.Stats(self.config)
# "old" bayes functions - new code should use "classifier_data" directly
--- 404,408 ----
self.classifier_data = ClassifierData(db_manager, self)
self.LoadBayes()
! self.stats = oastats.Stats(self.config, self.data_directory)
# "old" bayes functions - new code should use "classifier_data" directly
Index: oastats.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/oastats.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** oastats.py 16 Dec 2003 05:06:33 -0000 1.3
--- oastats.py 14 Oct 2004 23:36:12 -0000 1.4
***************
*** 1,7 ****
# oastats.py - Outlook Addin Stats
class Stats:
! def __init__(self, config):
self.config = config
self.Reset()
def Reset(self):
--- 1,23 ----
# oastats.py - Outlook Addin Stats
+ import os
+ import pickle
+
+ STATS_FILENAME = "performance_statistics_database.pik"
+
class Stats:
! def __init__(self, config, data_directory):
self.config = config
+ self.stored_statistics_fn = os.path.join(data_directory,
+ STATS_FILENAME)
+ if os.path.exists(self.stored_statistics_fn):
+ self.Load()
+ else:
+ # Reset totals
+ self.totals = {}
+ for stat in ["num_ham", "num_spam", "num_unsure",
+ "num_deleted_spam", "num_deleted_spam_fn",
+ "num_recovered_good", "num_recovered_good_fp",]:
+ self.totals[stat] = 0
self.Reset()
def Reset(self):
***************
*** 9,12 ****
--- 25,44 ----
self.num_deleted_spam = self.num_deleted_spam_fn = 0
self.num_recovered_good = self.num_recovered_good_fp = 0
+ def Load(self):
+ store = open(self.stored_statistics_fn, 'rb')
+ self.totals = pickle.load(store)
+ store.close()
+ def Store(self):
+ # Update totals, and save that.
+ for stat in ["num_ham", "num_spam", "num_unsure",
+ "num_deleted_spam", "num_deleted_spam_fn",
+ "num_recovered_good", "num_recovered_good_fp",]:
+ self.totals[stat] += getattr(self, stat)
+ store = open(self.stored_statistics_fn, 'wb')
+ pickle.dump(self.totals, store)
+ store.close()
+ # Reset, or the reporting for the remainder of this session will be
+ # incorrect.
+ self.Reset()
def RecordClassification(self, score):
score *= 100 # same units as our config values.
***************
*** 31,51 ****
if score < self.config.filter.unsure_threshold:
self.num_deleted_spam_fn += 1
! def GetStats(self):
num_seen = self.num_ham + self.num_spam + self.num_unsure
if num_seen==0:
return ["SpamBayes has processed zero messages"]
chunks = []
push = chunks.append
! perc_ham = 100.0 * self.num_ham / num_seen
! perc_spam = 100.0 * self.num_spam / num_seen
perc_unsure = 100.0 * self.num_unsure / num_seen
! format_dict = dict(perc_spam=perc_spam, perc_ham=perc_ham,
! perc_unsure=perc_unsure, num_seen = num_seen)
! format_dict.update(self.__dict__)
push("SpamBayes has processed %(num_seen)d messages - " \
"%(num_ham)d (%(perc_ham).0f%%) good, " \
"%(num_spam)d (%(perc_spam).0f%%) spam " \
"and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
! if self.num_recovered_good:
push("%(num_recovered_good)d message(s) were manually " \
"classified as good (with %(num_recovered_good_fp)d " \
--- 63,120 ----
if score < self.config.filter.unsure_threshold:
self.num_deleted_spam_fn += 1
! def GetStats(self, session_only=False):
! """Return a description of the statistics.
!
! If session_only is True, then only a description of the statistics
! since we were last reset. Otherwise, lifetime statistics (i.e.
! those including the ones loaded).
!
! Users probably care most about persistent statistics, so present
! those by default. If session-only stats are desired, then a
! special call to here can be made.
! """
num_seen = self.num_ham + self.num_spam + self.num_unsure
+ if not session_only:
+ totals = self.totals
+ num_seen += (totals["num_ham"] + totals["num_spam"] +
+ totals["num_unsure"])
if num_seen==0:
return ["SpamBayes has processed zero messages"]
chunks = []
push = chunks.append
! if session_only:
! num_ham = self.num_ham
! num_spam = self.num_spam
! num_unsure = self.num_unsure
! num_recovered_good = self.num_recovered_good
! num_recovered_good_fp = self.num_recovered_good_fp
! num_deleted_spam = self.num_deleted_spam
! num_deleted_spam_fn = self.num_deleted_spam_fn
! else:
! num_ham = self.num_ham + self.totals["num_ham"]
! num_spam = self.num_spam + self.totals["num_spam"]
! num_unsure = self.num_unsure + self.totals["num_unsure"]
! num_recovered_good = self.num_recovered_good + \
! self.totals["num_recovered_good"]
! num_recovered_good_fp = self.num_recovered_good_fp + \
! self.totals["num_recovered_good_fp"]
! num_deleted_spam = self.num_deleted_spam + \
! self.totals["num_deleted_spam"]
! num_deleted_spam_fn = self.num_deleted_spam_fn + \
! self.totals["num_deleted_spam_fn"]
! perc_ham = 100.0 * num_ham / num_seen
! perc_spam = 100.0 * num_spam / num_seen
perc_unsure = 100.0 * self.num_unsure / num_seen
! format_dict = locals().copy()
! del format_dict["self"]
! del format_dict["push"]
! del format_dict["chunks"]
! format_dict.update(dict(perc_spam=perc_spam, perc_ham=perc_ham,
! perc_unsure=perc_unsure, num_seen=num_seen))
push("SpamBayes has processed %(num_seen)d messages - " \
"%(num_ham)d (%(perc_ham).0f%%) good, " \
"%(num_spam)d (%(perc_spam).0f%%) spam " \
"and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
! if num_recovered_good:
push("%(num_recovered_good)d message(s) were manually " \
"classified as good (with %(num_recovered_good_fp)d " \
***************
*** 53,57 ****
else:
push("No messages were manually classified as good")
! if self.num_deleted_spam:
push("%(num_deleted_spam)d message(s) were manually " \
"classified as spam (with %(num_deleted_spam_fn)d " \
--- 122,126 ----
else:
push("No messages were manually classified as good")
! if num_deleted_spam:
push("%(num_deleted_spam)d message(s) were manually " \
"classified as spam (with %(num_deleted_spam_fn)d " \
***************
*** 67,79 ****
class Config:
filter = FilterConfig()
# processed zero
! s = Stats(Config())
print "\n".join(s.GetStats())
# No recovery
! s = Stats(Config())
s.RecordClassification(.2)
print "\n".join(s.GetStats())
! s = Stats(Config())
s.RecordClassification(.2)
s.RecordClassification(.1)
--- 136,149 ----
class Config:
filter = FilterConfig()
+ data_directory = os.getcwd()
# processed zero
! s = Stats(Config(), data_directory)
print "\n".join(s.GetStats())
# No recovery
! s = Stats(Config(), data_directory)
s.RecordClassification(.2)
print "\n".join(s.GetStats())
! s = Stats(Config(), data_directory)
s.RecordClassification(.2)
s.RecordClassification(.1)
***************
*** 85,86 ****
--- 155,164 ----
s.RecordManualClassification(False, 0.9)
print "\n".join(s.GetStats())
+
+ # Store
+ # (this will leave an artifact in the cwd)
+ s.Store()
+ # Load
+ s = Stats(Config(), data_directory)
+ print "\n".join(s.GetStats())
+ print "\n".join(s.GetStats(True))
More information about the Spambayes-checkins
mailing list