[Spambayes-checkins] spambayes/Outlook2000 oastats.py, NONE,
1.1 addin.py, 1.112, 1.113 filter.py, 1.33, 1.34 manager.py,
1.87, 1.88
Mark Hammond
mhammond at users.sourceforge.net
Sun Sep 28 22:14:28 EDT 2003
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv20556
Modified Files:
addin.py filter.py manager.py
Added Files:
oastats.py
Log Message:
Add slightly better stats, and a better framework to extend.
--- NEW FILE: oastats.py ---
# oastats.py - Outlook Addin Stats
class Stats:
def __init__(self, config):
self.config = config
self.Reset()
def Reset(self):
self.num_ham = self.num_spam = self.num_unsure = 0
self.num_deleted_spam = self.num_deleted_spam_fn = 0
self.num_recovered_good = self.num_recovered_good_fp = 0
def RecordClassification(self, score):
score *= 100 # same units as our config values.
if score >= self.config.filter.spam_threshold:
self.num_spam += 1
elif score >= self.config.filter.unsure_threshold:
self.num_unsure += 1
else:
self.num_ham += 1
def RecordManualClassification(self, recover_as_good, score):
score *= 100 # same units as our config values.
if recover_as_good:
self.num_recovered_good += 1
# If we are recovering an item that is in the "spam" threshold,
# then record it as a "false positive"
if score > self.config.filter.spam_threshold:
self.num_recovered_good_fp += 1
else:
self.num_deleted_spam += 1
# If we are deleting as Spam an item that was in our "good" range,
# then record it as a false neg.
if score < self.config.filter.unsure_threshold:
self.num_deleted_spam_fn += 1
def GetStats(self):
num_seen = self.num_ham + self.num_spam + self.num_unsure
if num_seen==0:
return ["SpamBayes has processed zero messages"]
chunks = []
push = chunks.append
perc_ham = 100.0 * self.num_ham / num_seen
perc_spam = 100.0 * self.num_spam / num_seen
perc_unsure = 100.0 * self.num_unsure / num_seen
format_dict = dict(perc_spam=perc_spam, perc_ham=perc_ham,
perc_unsure=perc_unsure, num_seen = num_seen)
format_dict.update(self.__dict__)
push("SpamBayes has processed %(num_seen)d messages - " \
"%(num_ham)d (%(perc_ham)d%%) good, " \
"%(num_spam)d (%(perc_spam)d%%) spam " \
"and %(num_unsure)d (%(perc_unsure)d%%) unsure" % format_dict)
if self.num_recovered_good:
push("%(num_recovered_good)d message(s) were manually " \
"classified as good (with %(num_recovered_good_fp)d " \
"being false positives)" % format_dict)
else:
push("No messages were manually classified as good")
if self.num_deleted_spam:
push("%(num_deleted_spam)d message(s) were manually " \
"classified as spam (with %(num_deleted_spam_fn)d " \
"being false negatives)" % format_dict)
else:
push("No messages were manually classified as spam")
return chunks
if __name__=='__main__':
class FilterConfig:
unsure_threshold = 15
spam_threshold = 85
class Config:
filter = FilterConfig()
# processed zero
s = Stats(Config())
print "\n".join(s.GetStats())
# No recovery
s = Stats(Config())
s.RecordClassification(.2)
print "\n".join(s.GetStats())
s = Stats(Config())
s.RecordClassification(.2)
s.RecordClassification(.1)
s.RecordClassification(.4)
s.RecordClassification(.9)
s.RecordManualClassification(True, 0.1)
s.RecordManualClassification(True, 0.9)
s.RecordManualClassification(False, 0.1)
s.RecordManualClassification(False, 0.9)
print "\n".join(s.GetStats())
Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.112
retrieving revision 1.113
diff -C2 -d -r1.112 -r1.113
*** addin.py 15 Sep 2003 06:26:35 -0000 1.112
--- addin.py 29 Sep 2003 02:14:25 -0000 1.113
***************
*** 611,615 ****
if not self.manager.config.filter.enabled:
self.manager.ReportError(
! "You must enable SpamBayes before you can delete as spam")
return
SetWaitCursor(1)
--- 611,616 ----
if not self.manager.config.filter.enabled:
self.manager.ReportError(
! "You must configure and enable SpamBayes before you can " \
! "delete as spam")
return
SetWaitCursor(1)
***************
*** 631,634 ****
--- 632,638 ----
new_msg_state = self.manager.config.general.delete_as_spam_message_state
for msgstore_message in msgstore_messages:
+ # Record this recovery in our stats.
+ self.manager.stats.RecordManualClassification(False,
+ self.manager.score(msgstore_message))
# Must train before moving, else we lose the message!
subject = msgstore_message.GetSubject()
***************
*** 666,670 ****
if not self.manager.config.filter.enabled:
self.manager.ReportError(
! "You must enable SpamBayes before you can recover spam")
return
SetWaitCursor(1)
--- 670,675 ----
if not self.manager.config.filter.enabled:
self.manager.ReportError(
! "You must configure and enable SpamBayes before you can " \
! "recover spam")
return
SetWaitCursor(1)
***************
*** 680,683 ****
--- 685,691 ----
# that the source folder == dest folder - restore to
# the inbox in this case.
+ # (But more likely is that the original store may be read-only
+ # so we were unable to record the initial folder, as we save it
+ # *before* we do the move (and saving after is hard))
try:
subject = msgstore_message.GetSubject()
***************
*** 688,691 ****
--- 696,702 ----
restore_folder = inbox_folder
+ # Record this recovery in our stats.
+ self.manager.stats.RecordManualClassification(True,
+ self.manager.score(msgstore_message))
# Must train before moving, else we lose the message!
print "Recovering to folder '%s' and ham training message '%s' - " % (restore_folder.name, subject),
***************
*** 1235,1239 ****
def ProcessMissedMessages(self):
- # This could possibly spawn threads if it was too slow!
from time import clock
config = self.manager.config.filter
--- 1246,1249 ----
***************
*** 1339,1345 ****
# it (ie, the dialog)
self.manager.Save()
! stats = self.manager.stats
! print "SpamBayes processed %d messages, finding %d spam and %d unsure" % \
! (stats.num_seen, stats.num_spam, stats.num_unsure)
self.manager.Close()
self.manager = None
--- 1349,1354 ----
# it (ie, the dialog)
self.manager.Save()
! # Report some simple stats.
! print "\r\n".join(self.manager.stats.GetStats())
self.manager.Close()
self.manager = None
Index: filter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -d -r1.33 -r1.34
*** filter.py 19 Sep 2003 04:03:38 -0000 1.33
--- filter.py 29 Sep 2003 02:14:25 -0000 1.34
***************
*** 13,17 ****
config = mgr.config.filter
prob = mgr.score(msg)
- mgr.stats.num_seen += 1
prob_perc = prob * 100
if prob_perc >= config.spam_threshold:
--- 13,16 ----
***************
*** 81,84 ****
--- 80,84 ----
raise RuntimeError, "Eeek - bad action '%r'" % (action,)
+ mgr.stats.RecordClassification(prob)
return disposition
except:
Index: manager.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v
retrieving revision 1.87
retrieving revision 1.88
diff -C2 -d -r1.87 -r1.88
*** manager.py 10 Sep 2003 07:42:45 -0000 1.87
--- manager.py 29 Sep 2003 02:14:25 -0000 1.88
***************
*** 15,18 ****
--- 15,19 ----
import msgstore
+ import oastats
try:
***************
*** 133,139 ****
pass
- class Stats:
- def __init__(self):
- self.num_seen = self.num_spam = self.num_unsure = 0
# Function to "safely" save a pickle, only overwriting
--- 134,137 ----
***************
*** 323,327 ****
self.addin = None
self.verbose = verbose
- self.stats = Stats()
self.outlook = outlook
self.dialog_parser = None
--- 321,324 ----
***************
*** 386,389 ****
--- 383,387 ----
self.classifier_data = ClassifierData(db_manager, self)
self.LoadBayes()
+ self.stats = oastats.Stats(self.config)
# "old" bayes functions - new code should use "classifier_data" directly
More information about the Spambayes-checkins
mailing list