[Spambayes-checkins]
spambayes hammie.py,1.11,1.12 setup.py,1.2,1.3 timtest.py,1.11,1.12
Jeremy Hylton
jhylton@users.sourceforge.net
Sat, 07 Sep 2002 09:15:47 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv17725
Modified Files:
hammie.py setup.py timtest.py
Log Message:
Use tokenizer module.
XXX Watch out, Tim! I just change timtest out from under you.
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** hammie.py 7 Sep 2002 06:18:03 -0000 1.11
--- hammie.py 7 Sep 2002 16:15:45 -0000 1.12
***************
*** 47,51 ****
# Tim's tokenizer kicks far more booty than anything I would have
# written. Score one for analysis ;)
! from timtoken import tokenize
class DBDict:
--- 47,51 ----
# Tim's tokenizer kicks far more booty than anything I would have
# written. Score one for analysis ;)
! from tokenizer import tokenize
class DBDict:
Index: setup.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/setup.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** setup.py 7 Sep 2002 05:52:48 -0000 1.2
--- setup.py 7 Sep 2002 16:15:45 -0000 1.3
***************
*** 4,8 ****
name='spambayes',
scripts=['unheader.py', 'hammie.py'],
! py_modules=['classifier', 'timtoken']
)
--- 4,8 ----
name='spambayes',
scripts=['unheader.py', 'hammie.py'],
! py_modules=['classifier', 'tokenizer']
)
Index: timtest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/timtest.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** timtest.py 7 Sep 2002 05:11:31 -0000 1.11
--- timtest.py 7 Sep 2002 16:15:45 -0000 1.12
***************
*** 13,17 ****
import Tester
import classifier
! from timtoken import tokenize
class Hist:
--- 13,17 ----
import Tester
import classifier
! from tokenizer import tokenize
class Hist:
***************
*** 63,67 ****
print "prob(%r) = %g" % clue
print
! guts = msg.guts
if charlimit is not None:
guts = guts[:charlimit]
--- 63,67 ----
print "prob(%r) = %g" % clue
print
! guts = str(msg)
if charlimit is not None:
guts = guts[:charlimit]
***************
*** 86,89 ****
--- 86,92 ----
return self.tag == other.tag
+ def __str__(self):
+ return self.guts
+
class MsgStream(object):
def __init__(self, directory):
***************
*** 153,157 ****
printhist("all runs:", self.global_ham_hist, self.global_spam_hist)
! def test(self, ham, spam):
c = self.classifier
t = self.tester
--- 156,160 ----
printhist("all runs:", self.global_ham_hist, self.global_spam_hist)
! def test(self, ham, spam, charlimit=None):
c = self.classifier
t = self.tester
***************
*** 168,172 ****
print "Low prob spam!", prob
prob, clues = c.spamprob(msg, True)
! printmsg(msg, prob, clues)
t.reset_test_results()
--- 171,175 ----
print "Low prob spam!", prob
prob, clues = c.spamprob(msg, True)
! printmsg(msg, prob, clues, charlimit)
t.reset_test_results()
***************
*** 185,189 ****
print '*' * 78
prob, clues = c.spamprob(e, True)
! printmsg(e, prob, clues)
newfneg = Set(t.false_negatives()) - self.falseneg
--- 188,192 ----
print '*' * 78
prob, clues = c.spamprob(e, True)
! printmsg(e, prob, clues, charlimit)
newfneg = Set(t.false_negatives()) - self.falseneg