[Spambayes-checkins] spambayes README.txt,1.32,1.33 hammie.py,1.27,1.28 runtest.sh,1.5,1.6

Sat, 05 Oct 2002 17:23:43 -0700

Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv20019

Modified Files:
	README.txt hammie.py runtest.sh 
Log Message:
* Updated README to mention hammiesrv.
* hammie now supports -r flag which makes -u show hams, not spams
  (thanks Alexander Leidinger).
* hammie now prints summary of all -u runs instead of one summary
  per run (thanks Alexander Leidinger).  If you liked it better the
  other way, please let me know.

Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** README.txt	5 Oct 2002 04:22:49 -0000	1.32
--- README.txt	6 Oct 2002 00:23:40 -0000	1.33
***************
*** 74,78 ****
  hammie.py
      A spamassassin-like filter which uses tokenizer and classifier (above).
!     Needs to be made faster, especially for writes.

  pop3proxy.py
--- 74,81 ----
  hammie.py
      A spamassassin-like filter which uses tokenizer and classifier (above).
! 
! hammiesrv.py
!     A first stab at making hammie into a client/server model, using
!     XML-RPC.

  pop3proxy.py

Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** hammie.py	1 Oct 2002 15:07:45 -0000	1.27
--- hammie.py	6 Oct 2002 00:23:40 -0000	1.28
***************
*** 18,21 ****
--- 18,24 ----
          mbox of unknown messages.  A ham/spam decision is reported for each.
          Can be specified more than once.
+     -r
+         reverse the meaning of the check (report ham instead of spam).
+         Only meaningful with the -u option.
      -p FILE
          use file as the persistent store.  loads data from this file if it
***************
*** 27,31 ****
      -f
          run as a filter: read a single message from stdin, add an
!         %(DISPHEADER)s header, and write it to stdout.
  """

--- 30,35 ----
      -f
          run as a filter: read a single message from stdin, add an
!         %(DISPHEADER)s header, and write it to stdout.  If you want to
!         run from procmail, this is your option.
  """

***************
*** 314,318 ****
      print

! def score(hammie, msgs):
      """Score (judge) all messages from a mailbox."""
      # XXX The reporting needs work!
--- 318,322 ----
      print

! def score(hammie, msgs, reverse=0):
      """Score (judge) all messages from a mailbox."""
      # XXX The reporting needs work!
***************
*** 323,338 ****
          i += 1
          prob, clues = hammie.score(msg, True)
-         isspam = prob >= SPAM_THRESHOLD
          if hasattr(msg, '_mh_msgno'):
              msgno = msg._mh_msgno
          else:
              msgno = i
          if isspam:
              spams += 1
!             print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
!             print hammie.formatclues(clues)
          else:
              hams += 1
!     print "Total %d spam, %d ham" % (spams, hams)

  def createbayes(pck=DEFAULTDB, usedb=False):
--- 327,346 ----
          i += 1
          prob, clues = hammie.score(msg, True)
          if hasattr(msg, '_mh_msgno'):
              msgno = msg._mh_msgno
          else:
              msgno = i
+         isspam = (prob >= SPAM_THRESHOLD)
          if isspam:
              spams += 1
!             if not reverse:
!                 print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
!                 print hammie.formatclues(clues)
          else:
              hams += 1
!             if reverse:
!                 print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
!                 print hammie.formatclues(clues)
!     return (spams, hams)

  def createbayes(pck=DEFAULTDB, usedb=False):
***************
*** 366,370 ****
      """Main program; parse options and go."""
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'hdfg:s:p:u:')
      except getopt.error, msg:
          usage(2, msg)
--- 374,378 ----
      """Main program; parse options and go."""
      try:
!         opts, args = getopt.getopt(sys.argv[1:], 'hdfg:s:p:u:r')
      except getopt.error, msg:
          usage(2, msg)
***************
*** 377,380 ****
--- 385,389 ----
      spam = []
      unknown = []
+     reverse = 0
      do_filter = usedb = False
      for opt, arg in opts:
***************
*** 393,396 ****
--- 402,407 ----
          elif opt == '-u':
              unknown.append(arg)
+         elif opt == '-r':
+             reverse = 1
      if args:
          usage(2, "Positional arguments not allowed")
***************
*** 424,431 ****

      if unknown:
          for u in unknown:
              if len(unknown) > 1:
                  print "Scoring", u
!             score(h, u)

  if __name__ == "__main__":
--- 435,447 ----

      if unknown:
+         (spams, hams) = (0, 0)
          for u in unknown:
              if len(unknown) > 1:
                  print "Scoring", u
!             s, h = score(h, u, reverse)
!             spams += s
!             hams += h
!         print "Total %d spam, %d ham" % (spams, hams)
!             

  if __name__ == "__main__":

Index: runtest.sh
===================================================================
RCS file: /cvsroot/spambayes/spambayes/runtest.sh,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** runtest.sh	1 Oct 2002 17:53:54 -0000	1.5
--- runtest.sh	6 Oct 2002 00:23:40 -0000	1.6
***************
*** 25,40 ****

  # Number of messages per rebalanced set
! RNUM=200

  # Number of sets
! SETS=5

  if [ -n "$REBAL" ]; then
      # Put them all into reservoirs
!     python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n 0 -Q
!     python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n 0 -Q
      # Rebalance
!     python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n $RNUM -Q
!     python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n $RNUM -Q
  fi

--- 25,40 ----

  # Number of messages per rebalanced set
! RNUM=${REBAL_RNUM:-200}

  # Number of sets
! SETS=${REBAL_SETS:-5}

  if [ -n "$REBAL" ]; then
      # Put them all into reservoirs
!     python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n 0 -q
!     python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n 0 -q
      # Rebalance
!     python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n $RNUM -q -Q
!     python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n $RNUM -q -Q
  fi

***************
*** 49,52 ****
--- 49,56 ----

          python cmp.py run1s run2s | tee results.txt
+ 	;;
+     *)
+ 	echo "Available targets:"
+ 	sed -n 's/^\(  [a-z|]*\))$/\1/p' $0
  	;;
  esac