[Spambayes-checkins] spambayes cmp.py,NONE,1.1 README.txt,1.2,1.3
Tim Peters
tim_one@users.sourceforge.net
Thu, 05 Sep 2002 16:42:55 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv11162
Modified Files:
README.txt
Added Files:
cmp.py
Log Message:
Checking in the script I use to produce listings of changes in f-p
and f-n rates between two test runs.
--- NEW FILE: cmp.py ---
"""
cmp.py sbase1 sbase2
Combines output from sbase1.txt and sbase2.txt, which are created by
rates.py from timtest.py output, and displays comparison statistics to
stdout.
"""
import sys
f1n, f2n = sys.argv[1:3]
NSETS = 5
# Return
# (list of all f-p rates,
# list of all f-n rates,
# total f-p,
# total f-n)
# from summary file f.
def suck(f):
fns = []
fps = []
for block in range(NSETS):
# Skip, e.g.,
# Training on Data/Ham/Set1 & Data/Spam/Set1 ... 4000 hams & 2750 spams
f.readline()
for inner in range(NSETS - 1):
# A line with an f-p rate and an f-n rate.
p, n = map(float, f.readline().split())
fps.append(p)
fns.append(n)
# "total false pos 8 0.04"
# "total false neg 249 1.81090909091"
fptot = int(f.readline().split()[-2])
fntot = int(f.readline().split()[-2])
return fps, fns, fptot, fntot
def dump(p1s, p2s):
alltags = ""
for p1, p2 in zip(p1s, p2s):
if p1 < p2:
tag = "lost"
elif p1 > p2:
tag = "won"
else:
tag = "tied"
print " %5.3f %5.3f %s" % (p1, p2, tag)
alltags += tag + " "
print
for tag in "won", "tied", "lost":
print "%-4s %2d %s" % (tag, alltags.count(tag), "times")
print
fp1, fn1, fptot1, fntot1 = suck(file(f1n + '.txt'))
fp2, fn2, fptot2, fntot2 = suck(file(f2n + '.txt'))
print f1n, '->', f2n
print
print "false positive percentages"
dump(fp1, fp2)
print "total unique fp went from", fptot1, "to", fptot2
print
print "false negative percentages"
dump(fn1, fn2)
print "total unique fn went from", fntot1, "to", fntot2
Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** README.txt 5 Sep 2002 23:34:41 -0000 1.2
--- README.txt 5 Sep 2002 23:42:52 -0000 1.3
***************
*** 44,47 ****
--- 44,52 ----
statistics.
+ cmp.py
+ Given two summary files produced by rates.py, displays an account
+ of all the f-p and f-n rates side-by-side, along with who won which
+ (etc), and the change in total # of f-ps and f-n.
+
Test Data Utilities