[Spambayes-checkins] spambayes weaktest.py,1.5,1.6

Mon Nov 25 13:22:39 2002

Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv12331

Modified Files:
	weaktest.py 
Log Message:
adapt to new update philosophy; add one new training system

Index: weaktest.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/weaktest.py,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** weaktest.py	19 Nov 2002 22:38:37 -0000	1.5
--- weaktest.py	25 Nov 2002 13:22:37 -0000	1.6
***************
*** 23,28 ****
      -d decider 
          Name of the decider. One of %(decisionkeys)s
-     -u updater
-         Name of the updater. One of %(updaterkeys)s
      -m min
          Minimal number of messages to train on before involving the decider.
--- 23,26 ----
***************
*** 54,57 ****
--- 52,59 ----
      sys.exit(code)
  
+ DONT_TRAIN = None
+ TRAIN_AS_HAM = 1
+ TRAIN_AS_SPAM = 2
+ 
  class TrainDecision:
      def __call__(self,scr,is_spam):
***************
*** 63,89 ****
  class UnsureAndFalses(TrainDecision):
      def spamtrain(self,scr):
!         return scr < options.spam_cutoff
  
      def hamtrain(self,scr):
!         return scr > options.ham_cutoff
  
  class UnsureOnly(TrainDecision):
      def spamtrain(self,scr):
!         return options.ham_cutoff < scr < options.spam_cutoff
  
!     hamtrain = spamtrain
  
  class All(TrainDecision):
      def spamtrain(self,scr):
!         return 1
  
!     hamtrain = spamtrain
  
  class AllBut0and100(TrainDecision):
      def spamtrain(self,scr):
!         return scr < 0.995
  
      def hamtrain(self,scr):
!         return scr > 0.005
  
  decisions={'all': All,
--- 65,112 ----
  class UnsureAndFalses(TrainDecision):
      def spamtrain(self,scr):
!         if scr < options.spam_cutoff:
! 	    return TRAIN_AS_SPAM
  
      def hamtrain(self,scr):
!         if scr > options.ham_cutoff:
! 	    return TRAIN_AS_HAM
  
  class UnsureOnly(TrainDecision):
      def spamtrain(self,scr):
!         if options.ham_cutoff < scr < options.spam_cutoff:
! 	    return TRAIN_AS_SPAM
  
!     def hamtrain(self,scr):
!         if options.ham_cutoff < scr < options.spam_cutoff:
! 	    return TRAIN_AS_HAM
  
  class All(TrainDecision):
      def spamtrain(self,scr):
!         return TRAIN_AS_SPAM
  
!     def hamtrain(self,scr):
!         return TRAIN_AS_HAM
  
  class AllBut0and100(TrainDecision):
      def spamtrain(self,scr):
!         if scr < 0.995:
! 	    return TRAIN_AS_SPAM
  
      def hamtrain(self,scr):
!         if scr > 0.005:
!             return TRAIN_AS_HAM
! 
! class OwnDecision(TrainDecision):
!     def hamtrain(self,scr):
!         if scr < options.ham_cutoff:
! 	    return TRAIN_AS_HAM
!         elif scr > options.spam_cutoff:
! 	    return TRAIN_AS_SPAM
! 
!     spamtrain = hamtrain
! 
! class OwnDecisionFNCorrection(OwnDecision):
!     def spamtrain(self,scr):
!         return TRAIN_AS_SPAM
  
  decisions={'all': All,
***************
*** 91,94 ****
--- 114,119 ----
             'unsureonly': UnsureOnly,
             'unsureandfalses': UnsureAndFalses,
+            'owndecision': OwnDecision,
+            'owndecision+fn': OwnDecisionFNCorrection,
            }
  decisionkeys=decisions.keys()
***************
*** 104,108 ****
          self.x += 1
          if self.tooearly():
!             return True
          else:
              return self.client(scr,is_spam)
--- 129,136 ----
          self.x += 1
          if self.tooearly():
!             if is_spam:
! 		return TRAIN_AS_SPAM
!             else:
! 		return TRAIN_AS_HAM
          else:
              return self.client(scr,is_spam)
***************
*** 118,143 ****
          self.d=d
  
! class AlwaysUpdate(Updater):
!     def __call__(self):
!         self.d.update_probabilities()
! 
! class SometimesUpdate(Updater):
!     def __init__(self,d=None,factor=10):
!         Updater.__init__(self,d)
!         self.factor=factor
!         self.n = 0
! 
!     def __call__(self):
!         self.n += 1
!         if self.n % self.factor == 0:
!             self.d.update_probabilities()
! 
! updaters={'always':AlwaysUpdate,
!           'sometimes':SometimesUpdate,
!          }
! updaterkeys=updaters.keys()
! updaterkeys.sort()
! 
! def drive(nsets,decision,updater):
      print options.display()
  
--- 146,150 ----
          self.d=d
  
! def drive(nsets,decision):
      print options.display()
  
***************
*** 156,161 ****
          allfns[fn] = None
  
!     d = hammie.Hammie(hammie.createbayes('weaktest.db', False))
!     updater.setd(d)
  
      hamtrain = 0
--- 163,167 ----
          allfns[fn] = None
  
!     d = hammie.open('weaktest.db', False)
  
      hamtrain = 0
***************
*** 179,190 ****
                      print "Ham with score %.2f"%scr
                  cc.ham(scr)
!         if decision(scr,is_spam):
!             if is_spam:
!                 d.train_spam(m)
!                 spamtrain += 1
!             else:
!                 d.train_ham(m)
!                 hamtrain += 1
!             updater()
          if n % 100 == 0:
              print "%5d trained:%dH+%dS wrds:%d"%(
--- 185,195 ----
                      print "Ham with score %.2f"%scr
                  cc.ham(scr)
!         de = decision(scr,is_spam) 
!         if de == TRAIN_AS_SPAM: 
!             d.train_spam(m)
!             spamtrain += 1
!         elif de == TRAIN_AS_HAM:
!             d.train_ham(m)
!             hamtrain += 1
          if n % 100 == 0:
              print "%5d trained:%dH+%dS wrds:%d"%(
***************
*** 202,206 ****
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'vd:u:hn:m:')
      except getopt.error, msg:
          usage(1, msg)
--- 207,211 ----
  
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'vd:hn:m:')
      except getopt.error, msg:
          usage(1, msg)
***************
*** 208,212 ****
      nsets = None
      decision = decisions['unsureonly']
-     updater = updaters['always']
      m = 10
  
--- 213,216 ----
***************
*** 224,231 ****
                  usage(1,'Unknown decisionmaker')
              decision = decisions[arg]
-         elif opt == '-u':
-             if not updaters.has_key(arg):
-                 usage(1,'Unknown updater')
-             updater = updaters[arg]
  
      if args:
--- 228,231 ----
***************
*** 234,238 ****
          usage(1, "-n is required")
  
!     drive(nsets,decision=FirstN(m,decision()),updater=updater())
  
  if __name__ == "__main__":
--- 234,238 ----
          usage(1, "-n is required")
  
!     drive(nsets,decision=FirstN(m,decision()))
  
  if __name__ == "__main__":