[Spambayes-checkins]
spambayes README.txt,1.32,1.33 hammie.py,1.27,1.28 runtest.sh,1.5,1.6
Neale Pickett
npickett@users.sourceforge.net
Sat, 05 Oct 2002 17:23:43 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv20019
Modified Files:
README.txt hammie.py runtest.sh
Log Message:
* Updated README to mention hammiesrv.
* hammie now supports -r flag which makes -u show hams, not spams
(thanks Alexander Leidinger).
* hammie now prints summary of all -u runs instead of one summary
per run (thanks Alexander Leidinger). If you liked it better the
other way, please let me know.
Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** README.txt 5 Oct 2002 04:22:49 -0000 1.32
--- README.txt 6 Oct 2002 00:23:40 -0000 1.33
***************
*** 74,78 ****
hammie.py
A spamassassin-like filter which uses tokenizer and classifier (above).
! Needs to be made faster, especially for writes.
pop3proxy.py
--- 74,81 ----
hammie.py
A spamassassin-like filter which uses tokenizer and classifier (above).
!
! hammiesrv.py
! A first stab at making hammie into a client/server model, using
! XML-RPC.
pop3proxy.py
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** hammie.py 1 Oct 2002 15:07:45 -0000 1.27
--- hammie.py 6 Oct 2002 00:23:40 -0000 1.28
***************
*** 18,21 ****
--- 18,24 ----
mbox of unknown messages. A ham/spam decision is reported for each.
Can be specified more than once.
+ -r
+ reverse the meaning of the check (report ham instead of spam).
+ Only meaningful with the -u option.
-p FILE
use file as the persistent store. loads data from this file if it
***************
*** 27,31 ****
-f
run as a filter: read a single message from stdin, add an
! %(DISPHEADER)s header, and write it to stdout.
"""
--- 30,35 ----
-f
run as a filter: read a single message from stdin, add an
! %(DISPHEADER)s header, and write it to stdout. If you want to
! run from procmail, this is your option.
"""
***************
*** 314,318 ****
print
! def score(hammie, msgs):
"""Score (judge) all messages from a mailbox."""
# XXX The reporting needs work!
--- 318,322 ----
print
! def score(hammie, msgs, reverse=0):
"""Score (judge) all messages from a mailbox."""
# XXX The reporting needs work!
***************
*** 323,338 ****
i += 1
prob, clues = hammie.score(msg, True)
- isspam = prob >= SPAM_THRESHOLD
if hasattr(msg, '_mh_msgno'):
msgno = msg._mh_msgno
else:
msgno = i
if isspam:
spams += 1
! print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
! print hammie.formatclues(clues)
else:
hams += 1
! print "Total %d spam, %d ham" % (spams, hams)
def createbayes(pck=DEFAULTDB, usedb=False):
--- 327,346 ----
i += 1
prob, clues = hammie.score(msg, True)
if hasattr(msg, '_mh_msgno'):
msgno = msg._mh_msgno
else:
msgno = i
+ isspam = (prob >= SPAM_THRESHOLD)
if isspam:
spams += 1
! if not reverse:
! print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
! print hammie.formatclues(clues)
else:
hams += 1
! if reverse:
! print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
! print hammie.formatclues(clues)
! return (spams, hams)
def createbayes(pck=DEFAULTDB, usedb=False):
***************
*** 366,370 ****
"""Main program; parse options and go."""
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hdfg:s:p:u:')
except getopt.error, msg:
usage(2, msg)
--- 374,378 ----
"""Main program; parse options and go."""
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hdfg:s:p:u:r')
except getopt.error, msg:
usage(2, msg)
***************
*** 377,380 ****
--- 385,389 ----
spam = []
unknown = []
+ reverse = 0
do_filter = usedb = False
for opt, arg in opts:
***************
*** 393,396 ****
--- 402,407 ----
elif opt == '-u':
unknown.append(arg)
+ elif opt == '-r':
+ reverse = 1
if args:
usage(2, "Positional arguments not allowed")
***************
*** 424,431 ****
if unknown:
for u in unknown:
if len(unknown) > 1:
print "Scoring", u
! score(h, u)
if __name__ == "__main__":
--- 435,447 ----
if unknown:
+ (spams, hams) = (0, 0)
for u in unknown:
if len(unknown) > 1:
print "Scoring", u
! s, h = score(h, u, reverse)
! spams += s
! hams += h
! print "Total %d spam, %d ham" % (spams, hams)
!
if __name__ == "__main__":
Index: runtest.sh
===================================================================
RCS file: /cvsroot/spambayes/spambayes/runtest.sh,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** runtest.sh 1 Oct 2002 17:53:54 -0000 1.5
--- runtest.sh 6 Oct 2002 00:23:40 -0000 1.6
***************
*** 25,40 ****
# Number of messages per rebalanced set
! RNUM=200
# Number of sets
! SETS=5
if [ -n "$REBAL" ]; then
# Put them all into reservoirs
! python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n 0 -Q
! python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n 0 -Q
# Rebalance
! python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n $RNUM -Q
! python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n $RNUM -Q
fi
--- 25,40 ----
# Number of messages per rebalanced set
! RNUM=${REBAL_RNUM:-200}
# Number of sets
! SETS=${REBAL_SETS:-5}
if [ -n "$REBAL" ]; then
# Put them all into reservoirs
! python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n 0 -q
! python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n 0 -q
# Rebalance
! python rebal.py -r Data/Ham/reservoir -s Data/Ham/Set -n $RNUM -q -Q
! python rebal.py -r Data/Spam/reservoir -s Data/Spam/Set -n $RNUM -q -Q
fi
***************
*** 49,52 ****
--- 49,56 ----
python cmp.py run1s run2s | tee results.txt
+ ;;
+ *)
+ echo "Available targets:"
+ sed -n 's/^\( [a-z|]*\))$/\1/p' $0
;;
esac