[Spambayes-checkins] spambayes hammie.py,1.11,1.12 setup.py,1.2,1.3 timtest.py,1.11,1.12

Jeremy Hylton jhylton@users.sourceforge.net
Sat, 07 Sep 2002 09:15:47 -0700


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv17725

Modified Files:
	hammie.py setup.py timtest.py 
Log Message:
Use tokenizer module.

XXX Watch out, Tim!  I just change timtest out from under you.


Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** hammie.py	7 Sep 2002 06:18:03 -0000	1.11
--- hammie.py	7 Sep 2002 16:15:45 -0000	1.12
***************
*** 47,51 ****
  # Tim's tokenizer kicks far more booty than anything I would have
  # written.  Score one for analysis ;)
! from timtoken import tokenize
  
  class DBDict:
--- 47,51 ----
  # Tim's tokenizer kicks far more booty than anything I would have
  # written.  Score one for analysis ;)
! from tokenizer import tokenize
  
  class DBDict:

Index: setup.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/setup.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** setup.py	7 Sep 2002 05:52:48 -0000	1.2
--- setup.py	7 Sep 2002 16:15:45 -0000	1.3
***************
*** 4,8 ****
    name='spambayes', 
    scripts=['unheader.py', 'hammie.py'],
!   py_modules=['classifier', 'timtoken']
    )
  
--- 4,8 ----
    name='spambayes', 
    scripts=['unheader.py', 'hammie.py'],
!   py_modules=['classifier', 'tokenizer']
    )
  

Index: timtest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/timtest.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** timtest.py	7 Sep 2002 05:11:31 -0000	1.11
--- timtest.py	7 Sep 2002 16:15:45 -0000	1.12
***************
*** 13,17 ****
  import Tester
  import classifier
! from timtoken import tokenize
  
  class Hist:
--- 13,17 ----
  import Tester
  import classifier
! from tokenizer import tokenize
  
  class Hist:
***************
*** 63,67 ****
          print "prob(%r) = %g" % clue
      print
!     guts = msg.guts
      if charlimit is not None:
          guts = guts[:charlimit]
--- 63,67 ----
          print "prob(%r) = %g" % clue
      print
!     guts = str(msg)
      if charlimit is not None:
          guts = guts[:charlimit]
***************
*** 86,89 ****
--- 86,92 ----
          return self.tag == other.tag
  
+     def __str__(self):
+         return self.guts
+ 
  class MsgStream(object):
      def __init__(self, directory):
***************
*** 153,157 ****
          printhist("all runs:", self.global_ham_hist, self.global_spam_hist)
  
!     def test(self, ham, spam):
          c = self.classifier
          t = self.tester
--- 156,160 ----
          printhist("all runs:", self.global_ham_hist, self.global_spam_hist)
  
!     def test(self, ham, spam, charlimit=None):
          c = self.classifier
          t = self.tester
***************
*** 168,172 ****
                  print "Low prob spam!", prob
                  prob, clues = c.spamprob(msg, True)
!                 printmsg(msg, prob, clues)
  
          t.reset_test_results()
--- 171,175 ----
                  print "Low prob spam!", prob
                  prob, clues = c.spamprob(msg, True)
!                 printmsg(msg, prob, clues, charlimit)
  
          t.reset_test_results()
***************
*** 185,189 ****
              print '*' * 78
              prob, clues = c.spamprob(e, True)
!             printmsg(e, prob, clues)
  
          newfneg = Set(t.false_negatives()) - self.falseneg
--- 188,192 ----
              print '*' * 78
              prob, clues = c.spamprob(e, True)
!             printmsg(e, prob, clues, charlimit)
  
          newfneg = Set(t.false_negatives()) - self.falseneg