[Spambayes-checkins] spambayes table.py,NONE,1.1
T. Alexander Popiel
popiel@users.sourceforge.net
Sun, 20 Oct 2002 22:00:08 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv2626
Added Files:
table.py
Log Message:
Added table.py, a tabular comparator. The output is not as detailed
as cmp.py, but it's concise and usable with more than two files.
--- NEW FILE: table.py ---
#!/usr/bin/env python
"""
table.py base1 base2 ... baseN
Combines output from base1.txt, base2.txt, etc., which are created by
the TestDriver (such as timcv.py) output, and displays tabulated
comparison statistics to stdout. Each input file is represented by
one column in the table.
"""
import sys
import re
# Return
# (
# ham tested,
# spam tested,
# total f-p,
# total f-n,
# total unsure,
# average f-p rate,
# average f-n rate,
# average unsure rate,
# real cost,
# best cost,
# ham score deviation for all runs,
# spam score deviations for all runs,
# )
# from summary file f.
def suck(f):
hamdevall = spamdevall = (0.0, 0.0)
cost = 0.0
bestcost = 0.0
fp = 0
fn = 0
un = 0
fpp = 0.0
fnp = 0.0
unp = 0.0
htest = 0
stest = 0
get = f.readline
while 1:
line = get()
if line.startswith('-> <stat> tested'):
print line,
htest = int(line.split()[3])
stest = int(line.split()[6])
if line.find(' items; mean ') != -1:
# -> <stat> Ham distribution for this pair: 1000 items; mean 0.05; sample sdev 0.68
# and later "sample " went away
vals = line.split(';')
mean = float(vals[1].split()[-1])
sdev = float(vals[2].split()[-1])
val = (mean, sdev)
typ = vals[0].split()[2]
if line.find('for all runs') != -1:
if typ == 'Ham':
hamdevall = val
else:
spamdevall = val
continue
if line.startswith('-> best cost for all runs: $'):
bestcost = float(line.split('$')[-1])
if line.startswith('-> <stat> all runs false positives: '):
fp = int(line.split()[-1])
if line.startswith('-> <stat> all runs false negatives: '):
fn = int(line.split()[-1])
if line.startswith('-> <stat> all runs unsure: '):
un = int(line.split()[-1])
if line.startswith('-> <stat> all runs false positive %: '):
fpp = float(line.split()[-1])
if line.startswith('-> <stat> all runs false negative %: '):
fnp = float(line.split()[-1])
if line.startswith('-> <stat> all runs unsure %: '):
unp = float(line.split()[-1])
if line.startswith('-> <stat> all runs cost: '):
cost = float(line.split('$')[-1])
break
if line.startswith('-> '):
continue
return (htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
hamdevall, spamdevall)
def windowsfy(fn):
import os
if os.path.exists(fn + '.txt'):
return fn + '.txt'
else:
return fn
ratio = "ham:spam: "
fptot = "fp total: "
fpper = "fp %: "
fntot = "fn total: "
fnper = "fn %: "
untot = "unsure t: "
unper = "unsure %: "
rcost = "real cost:"
bcost = "best cost:"
hmean = "h mean: "
hsdev = "h sdev: "
smean = "s mean: "
ssdev = "s sdev: "
meand = "mean diff:"
kval = "k: "
for filename in sys.argv[1:]:
filename = windowsfy(filename)
(htest, stest, fp, fn, un, fpp, fnp, unp, cost, bestcost,
hamdevall, spamdevall) = suck(file(filename))
ratio += "%8s" % ("%d:%d" % (htest, stest))
fptot += "%8d" % fp
fpper += "%8.2f" % fpp
fntot += "%8d" % fn
fnper += "%8.2f" % fnp
untot += "%8d" % un
unper += "%8.2f" % unp
rcost += "%8s" % ("$%.2f" % cost)
bcost += "%8s" % ("$%.2f" % bestcost)
hmean += "%8.2f" % hamdevall[0]
hsdev += "%8.2f" % hamdevall[1]
smean += "%8.2f" % spamdevall[0]
ssdev += "%8.2f" % spamdevall[1]
meand += "%8.2f" % (spamdevall[0] - hamdevall[0])
k = (spamdevall[0] - hamdevall[0]) / (spamdevall[1] + hamdevall[1])
kval += "%8.2f" % k
print ratio
print fptot
print fpper
print fntot
print fnper
print untot
print unper
print rcost
print bcost
print hmean
print hsdev
print smean
print ssdev
print meand
print kval