[Spambayes-checkins] spambayes weaktest.py,1.5,1.6
Rob W.W. Hooft
hooft@users.sourceforge.net
Mon Nov 25 13:22:39 2002
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv12331
Modified Files:
weaktest.py
Log Message:
adapt to new update philosophy; add one new training system
Index: weaktest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/weaktest.py,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** weaktest.py 19 Nov 2002 22:38:37 -0000 1.5
--- weaktest.py 25 Nov 2002 13:22:37 -0000 1.6
***************
*** 23,28 ****
-d decider
Name of the decider. One of %(decisionkeys)s
- -u updater
- Name of the updater. One of %(updaterkeys)s
-m min
Minimal number of messages to train on before involving the decider.
--- 23,26 ----
***************
*** 54,57 ****
--- 52,59 ----
sys.exit(code)
+ DONT_TRAIN = None
+ TRAIN_AS_HAM = 1
+ TRAIN_AS_SPAM = 2
+
class TrainDecision:
def __call__(self,scr,is_spam):
***************
*** 63,89 ****
class UnsureAndFalses(TrainDecision):
def spamtrain(self,scr):
! return scr < options.spam_cutoff
def hamtrain(self,scr):
! return scr > options.ham_cutoff
class UnsureOnly(TrainDecision):
def spamtrain(self,scr):
! return options.ham_cutoff < scr < options.spam_cutoff
! hamtrain = spamtrain
class All(TrainDecision):
def spamtrain(self,scr):
! return 1
! hamtrain = spamtrain
class AllBut0and100(TrainDecision):
def spamtrain(self,scr):
! return scr < 0.995
def hamtrain(self,scr):
! return scr > 0.005
decisions={'all': All,
--- 65,112 ----
class UnsureAndFalses(TrainDecision):
def spamtrain(self,scr):
! if scr < options.spam_cutoff:
! return TRAIN_AS_SPAM
def hamtrain(self,scr):
! if scr > options.ham_cutoff:
! return TRAIN_AS_HAM
class UnsureOnly(TrainDecision):
def spamtrain(self,scr):
! if options.ham_cutoff < scr < options.spam_cutoff:
! return TRAIN_AS_SPAM
! def hamtrain(self,scr):
! if options.ham_cutoff < scr < options.spam_cutoff:
! return TRAIN_AS_HAM
class All(TrainDecision):
def spamtrain(self,scr):
! return TRAIN_AS_SPAM
! def hamtrain(self,scr):
! return TRAIN_AS_HAM
class AllBut0and100(TrainDecision):
def spamtrain(self,scr):
! if scr < 0.995:
! return TRAIN_AS_SPAM
def hamtrain(self,scr):
! if scr > 0.005:
! return TRAIN_AS_HAM
!
! class OwnDecision(TrainDecision):
! def hamtrain(self,scr):
! if scr < options.ham_cutoff:
! return TRAIN_AS_HAM
! elif scr > options.spam_cutoff:
! return TRAIN_AS_SPAM
!
! spamtrain = hamtrain
!
! class OwnDecisionFNCorrection(OwnDecision):
! def spamtrain(self,scr):
! return TRAIN_AS_SPAM
decisions={'all': All,
***************
*** 91,94 ****
--- 114,119 ----
'unsureonly': UnsureOnly,
'unsureandfalses': UnsureAndFalses,
+ 'owndecision': OwnDecision,
+ 'owndecision+fn': OwnDecisionFNCorrection,
}
decisionkeys=decisions.keys()
***************
*** 104,108 ****
self.x += 1
if self.tooearly():
! return True
else:
return self.client(scr,is_spam)
--- 129,136 ----
self.x += 1
if self.tooearly():
! if is_spam:
! return TRAIN_AS_SPAM
! else:
! return TRAIN_AS_HAM
else:
return self.client(scr,is_spam)
***************
*** 118,143 ****
self.d=d
! class AlwaysUpdate(Updater):
! def __call__(self):
! self.d.update_probabilities()
!
! class SometimesUpdate(Updater):
! def __init__(self,d=None,factor=10):
! Updater.__init__(self,d)
! self.factor=factor
! self.n = 0
!
! def __call__(self):
! self.n += 1
! if self.n % self.factor == 0:
! self.d.update_probabilities()
!
! updaters={'always':AlwaysUpdate,
! 'sometimes':SometimesUpdate,
! }
! updaterkeys=updaters.keys()
! updaterkeys.sort()
!
! def drive(nsets,decision,updater):
print options.display()
--- 146,150 ----
self.d=d
! def drive(nsets,decision):
print options.display()
***************
*** 156,161 ****
allfns[fn] = None
! d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
! updater.setd(d)
hamtrain = 0
--- 163,167 ----
allfns[fn] = None
! d = hammie.open('weaktest.db', False)
hamtrain = 0
***************
*** 179,190 ****
print "Ham with score %.2f"%scr
cc.ham(scr)
! if decision(scr,is_spam):
! if is_spam:
! d.train_spam(m)
! spamtrain += 1
! else:
! d.train_ham(m)
! hamtrain += 1
! updater()
if n % 100 == 0:
print "%5d trained:%dH+%dS wrds:%d"%(
--- 185,195 ----
print "Ham with score %.2f"%scr
cc.ham(scr)
! de = decision(scr,is_spam)
! if de == TRAIN_AS_SPAM:
! d.train_spam(m)
! spamtrain += 1
! elif de == TRAIN_AS_HAM:
! d.train_ham(m)
! hamtrain += 1
if n % 100 == 0:
print "%5d trained:%dH+%dS wrds:%d"%(
***************
*** 202,206 ****
try:
! opts, args = getopt.getopt(sys.argv[1:], 'vd:u:hn:m:')
except getopt.error, msg:
usage(1, msg)
--- 207,211 ----
try:
! opts, args = getopt.getopt(sys.argv[1:], 'vd:hn:m:')
except getopt.error, msg:
usage(1, msg)
***************
*** 208,212 ****
nsets = None
decision = decisions['unsureonly']
- updater = updaters['always']
m = 10
--- 213,216 ----
***************
*** 224,231 ****
usage(1,'Unknown decisionmaker')
decision = decisions[arg]
- elif opt == '-u':
- if not updaters.has_key(arg):
- usage(1,'Unknown updater')
- updater = updaters[arg]
if args:
--- 228,231 ----
***************
*** 234,238 ****
usage(1, "-n is required")
! drive(nsets,decision=FirstN(m,decision()),updater=updater())
if __name__ == "__main__":
--- 234,238 ----
usage(1, "-n is required")
! drive(nsets,decision=FirstN(m,decision()))
if __name__ == "__main__":
More information about the Spambayes-checkins
mailing list