[Spambayes-checkins] spambayes/contrib tte.py,1.7,1.8
Skip Montanaro
montanaro at users.sourceforge.net
Mon Mar 15 20:05:36 EST 2004
Update of /cvsroot/spambayes/spambayes/contrib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23407
Modified Files:
tte.py
Log Message:
added a verbose flag
Index: tte.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** tte.py 7 Mar 2004 14:51:07 -0000 1.7
--- tte.py 16 Mar 2004 01:05:33 -0000 1.8
***************
*** 6,10 ****
usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \
! [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ]
-h - Print this usage message and exit.
--- 6,10 ----
usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \
! [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ] [ -v ]
-h - Print this usage message and exit.
***************
*** 32,35 ****
--- 32,38 ----
Set [sect, opt] in the options database to val.
+ -v Be very verbose, spewing all sorts of stuff out to stderr.
+
+
Note: The -c command line argument isn't quite as benign as it might first
appear. Since the tte protocol trains on the same number of ham and spam
***************
*** 78,82 ****
print >> sys.stderr, __doc__.strip() % globals()
! def train(store, ham, spam, maxmsgs, maxrounds, tdict):
smisses = hmisses = round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
--- 81,85 ----
print >> sys.stderr, __doc__.strip() % globals()
! def train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose):
smisses = hmisses = round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
***************
*** 87,90 ****
--- 90,97 ----
spamcan = mboxutils.getmbox(spam)
round += 1
+
+ if verbose:
+ print >> sys.stderr, "*** round", round, "***"
+
hmisses = smisses = nmsgs = 0
start = datetime.datetime.now()
***************
*** 98,107 ****
sys.stdout.flush()
! if store.spamprob(tokenize(hammsg)) > ham_cutoff:
hmisses += 1
tdict[hammsg["message-id"]] = True
store.learn(tokenize(hammsg), False)
! if store.spamprob(tokenize(spammsg)) < spam_cutoff:
smisses += 1
tdict[spammsg["message-id"]] = True
--- 105,120 ----
sys.stdout.flush()
! score = store.spamprob(tokenize(hammsg))
! if score > ham_cutoff:
! if verbose:
! print >> sys.stderr, "miss ham: %.6f %s" % (score, hammsg["message-id"])
hmisses += 1
tdict[hammsg["message-id"]] = True
store.learn(tokenize(hammsg), False)
! score = store.spamprob(tokenize(spammsg))
! if score < spam_cutoff:
! if verbose:
! print >> sys.stderr, "miss spam: %.6f %s" % (score, spammsg["message-id"])
smisses += 1
tdict[spammsg["message-id"]] = True
***************
*** 140,146 ****
def main(args):
try:
! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:",
["help", "good=", "spam=",
! "database=", "pickle=",
"option=", "max=", "maxrounds=",
"cullext="])
--- 153,159 ----
def main(args):
try:
! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:v",
["help", "good=", "spam=",
! "database=", "pickle=", "verbose",
"option=", "max=", "maxrounds=",
"cullext="])
***************
*** 152,159 ****
--- 165,175 ----
maxmsgs = 0
maxrounds = MAXROUNDS
+ verbose = False
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
return 0
+ elif opt in ("-v", "--verbose"):
+ verbose = True
elif opt in ("-g", "--good"):
ham = arg
***************
*** 183,187 ****
tdict = {}
! train(store, ham, spam, maxmsgs, maxrounds, tdict)
store.store()
--- 199,203 ----
tdict = {}
! train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose)
store.store()
More information about the Spambayes-checkins
mailing list