[Spambayes-checkins] spambayes pop3graph.py,NONE,1.1
Richie Hindle
richiehindle@users.sourceforge.net
Wed Nov 20 12:30:18 2002
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv17757
Added Files:
pop3graph.py
Log Message:
Script for producing ASCII graphs of classifier performance, based
on pop3proxy corpuses.
--- NEW FILE: pop3graph.py ---
"""Analyse the pop3proxy's caches and produce a graph of how accurate
classifier has been over time. Only really meaningful if you started
with an empty database."""
from __future__ import division
import sys, mboxutils
from FileCorpus import FileCorpus, FileMessageFactory, GzipFileMessageFactory
from Options import options
def main():
# Create the corpuses and the factory that reads the messages.
if options.pop3proxy_cache_use_gzip:
messageFactory = GzipFileMessageFactory()
else:
messageFactory = FileMessageFactory()
spamCorpus = FileCorpus(messageFactory, options.pop3proxy_spam_cache)
hamCorpus = FileCorpus(messageFactory, options.pop3proxy_ham_cache)
# Read in all the trained messages.
allTrained = {}
for corpus, disposition in [(spamCorpus, 'Yes'), (hamCorpus, 'No')]:
for m in corpus:
message = mboxutils.get_message(m.getSubstance())
message._pop3CacheDisposition = disposition
allTrained[m.key()] = message
# Sort the messages into the order they arrived, then work out a scaling
# factor for the graph - 'limit' is the widest it can be in characters.
keys = allTrained.keys()
keys.sort()
limit = 70
if len(keys) < limit:
scale = 1
else:
scale = len(keys) // (limit//2)
# Build the data - an array of cumulative success indexed by count.
count = successful = 0
successByCount = []
for key in keys:
message = allTrained[key]
disposition = message[options.hammie_header_name]
if (message._pop3CacheDisposition == disposition):
successful += 1
count += 1
if count % scale == (scale-1):
successByCount.append(successful // scale)
# Build the graph, as a list of rows of characters.
size = count // scale
graph = [[" " for i in range(size+3)] for j in range(size)]
for c in range(size):
graph[c][1] = "|"
graph[c][c+3] = "."
graph[successByCount[c]][c+3] = "*"
graph.reverse()
# Print the graph.
print "\n Success of the classifier over time:\n"
print " . - Number of messages over time"
print " * - Number of correctly classified messages over time\n\n"
for row in range(size):
line = ''.join(graph[row])
if row == 0:
print line + " %d" % count
elif row == (count - successful) // scale:
print line + " %d" % successful
else:
print line
print " " + "_" * (size+2)
if __name__ == '__main__':
main()
More information about the Spambayes-checkins
mailing list