[Spambayes-checkins] spambayes/contrib tte.py,1.7,1.8

Mon Mar 15 20:05:36 EST 2004

Update of /cvsroot/spambayes/spambayes/contrib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23407

Modified Files:
	tte.py 
Log Message:
added a verbose flag


Index: tte.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** tte.py	7 Mar 2004 14:51:07 -0000	1.7
--- tte.py	16 Mar 2004 01:05:33 -0000	1.8
***************
*** 6,10 ****
  
  usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \
!                [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ]
  
  -h      - Print this usage message and exit.
--- 6,10 ----
  
  usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \
!                [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ] [ -v ]
  
  -h      - Print this usage message and exit.
***************
*** 32,35 ****
--- 32,38 ----
            Set [sect, opt] in the options database to val.
  
+ -v        Be very verbose, spewing all sorts of stuff out to stderr.
+ 
+ 
  Note: The -c command line argument isn't quite as benign as it might first
  appear.  Since the tte protocol trains on the same number of ham and spam
***************
*** 78,82 ****
      print >> sys.stderr, __doc__.strip() % globals()
  
! def train(store, ham, spam, maxmsgs, maxrounds, tdict):
      smisses = hmisses = round = 0
      ham_cutoff = Options.options["Categorization", "ham_cutoff"]
--- 81,85 ----
      print >> sys.stderr, __doc__.strip() % globals()
  
! def train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose):
      smisses = hmisses = round = 0
      ham_cutoff = Options.options["Categorization", "ham_cutoff"]
***************
*** 87,90 ****
--- 90,97 ----
          spamcan = mboxutils.getmbox(spam)
          round += 1
+ 
+         if verbose:
+             print >> sys.stderr, "*** round", round, "***"
+ 
          hmisses = smisses = nmsgs = 0
          start = datetime.datetime.now()
***************
*** 98,107 ****
                  sys.stdout.flush()
  
!                 if store.spamprob(tokenize(hammsg)) > ham_cutoff:
                      hmisses += 1
                      tdict[hammsg["message-id"]] = True
                      store.learn(tokenize(hammsg), False)
  
!                 if store.spamprob(tokenize(spammsg)) < spam_cutoff:
                      smisses += 1
                      tdict[spammsg["message-id"]] = True
--- 105,120 ----
                  sys.stdout.flush()
  
!                 score = store.spamprob(tokenize(hammsg))
!                 if score > ham_cutoff:
!                     if verbose:
!                         print >> sys.stderr, "miss ham:  %.6f %s" % (score, hammsg["message-id"])
                      hmisses += 1
                      tdict[hammsg["message-id"]] = True
                      store.learn(tokenize(hammsg), False)
  
!                 score = store.spamprob(tokenize(spammsg))
!                 if score < spam_cutoff:
!                     if verbose:
!                         print >> sys.stderr, "miss spam: %.6f %s" % (score, spammsg["message-id"])
                      smisses += 1
                      tdict[spammsg["message-id"]] = True
***************
*** 140,146 ****
  def main(args):
      try:
!         opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:",
                                     ["help", "good=", "spam=",
!                                     "database=", "pickle=",
                                      "option=", "max=", "maxrounds=",
                                      "cullext="])
--- 153,159 ----
  def main(args):
      try:
!         opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:v",
                                     ["help", "good=", "spam=",
!                                     "database=", "pickle=", "verbose",
                                      "option=", "max=", "maxrounds=",
                                      "cullext="])
***************
*** 152,159 ****
--- 165,175 ----
      maxmsgs = 0
      maxrounds = MAXROUNDS
+     verbose = False
      for opt, arg in opts:
          if opt in ("-h", "--help"):
              usage()
              return 0
+         elif opt in ("-v", "--verbose"):
+             verbose = True
          elif opt in ("-g", "--good"):
              ham = arg
***************
*** 183,187 ****
  
      tdict = {}
!     train(store, ham, spam, maxmsgs, maxrounds, tdict)
  
      store.store()
--- 199,203 ----
  
      tdict = {}
!     train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose)
  
      store.store()