[Spambayes-checkins]
spambayes README.txt,1.19,1.20 pop3proxy.py,1.1,1.2 hammie.py,1.16,1.17
Richie Hindle
richiehindle@users.sourceforge.net
Wed, 18 Sep 2002 15:01:42 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv20824
Modified Files:
README.txt pop3proxy.py hammie.py
Log Message:
Added SPAM_THRESHOLD and createbayes() to hammie, so
that pop3proxy can use them.
Made pop3proxy add simple X-Hammie-Disposition headers
raher than using its own header format.
Made pop3proxy.py obey the Python style guide.
Removed the copyright and license from pop3proxy,py - I've
assigned copyright to the PSF.
Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** README.txt 17 Sep 2002 04:49:16 -0000 1.19
--- README.txt 18 Sep 2002 22:01:39 -0000 1.20
***************
*** 60,63 ****
--- 60,69 ----
Needs to be made faster, especially for writes.
+ pop3proxy.py
+ A spam-classifying POP3 proxy. It adds a spam-judgement header to
+ each mail as it's retrieved, so you can use your email client's
+ filters to deal with them without needing to fiddle with your email
+ delivery system.
+
Concrete Test Drivers
Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** pop3proxy.py 16 Sep 2002 07:57:20 -0000 1.1
--- pop3proxy.py 18 Sep 2002 22:01:39 -0000 1.2
***************
*** 1,31 ****
#!/usr/bin/env python
! # pop3proxy is released under the terms of the following MIT-style license:
! #
! # Copyright (c) Entrian Solutions 2002
! #
! # Permission is hereby granted, free of charge, to any person obtaining a
! # copy of this software and associated documentation files (the "Software"),
! # to deal in the Software without restriction, including without limitation
! # the rights to use, copy, modify, merge, publish, distribute, sublicense,
[...1035 lines suppressed...]
# Named POP3 server, default port.
! main( args[ 0 ], 110, 110, pickleName, useDB )
! elif len( args ) == 2:
# Named POP3 server, named port.
! main( args[ 0 ], int( args[ 1 ] ), 110, pickleName, useDB )
else:
--- 571,581 ----
asyncore.loop()
! elif len(args) == 1:
# Named POP3 server, default port.
! main(args[0], 110, 110, pickleName, useDB)
! elif len(args) == 2:
# Named POP3 server, named port.
! main(args[0], int(args[1]), 110, pickleName, useDB)
else:
Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** hammie.py 12 Sep 2002 05:10:02 -0000 1.16
--- hammie.py 18 Sep 2002 22:01:39 -0000 1.17
***************
*** 47,50 ****
--- 47,53 ----
DEFAULTDB = "hammie.db"
+ # Probability at which a message is considered spam
+ SPAM_THRESHOLD = 0.9
+
# Tim's tokenizer kicks far more booty than anything I would have
# written. Score one for analysis ;)
***************
*** 232,236 ****
msg = email.message_from_file(input)
prob, clues = bayes.spamprob(tokenize(msg), True)
! if prob < 0.9:
disp = "No"
else:
--- 235,239 ----
msg = email.message_from_file(input)
prob, clues = bayes.spamprob(tokenize(msg), True)
! if prob < SPAM_THRESHOLD:
disp = "No"
else:
***************
*** 250,254 ****
i += 1
prob, clues = bayes.spamprob(tokenize(msg), True)
! isspam = prob >= 0.9
if hasattr(msg, '_mh_msgno'):
msgno = msg._mh_msgno
--- 253,257 ----
i += 1
prob, clues = bayes.spamprob(tokenize(msg), True)
! isspam = prob >= SPAM_THRESHOLD
if hasattr(msg, '_mh_msgno'):
msgno = msg._mh_msgno
***************
*** 263,266 ****
--- 266,288 ----
print "Total %d spam, %d ham" % (spams, hams)
+ def createbayes(pck=DEFAULTDB, usedb=False):
+ """Create a GrahamBayes instance for the given pickle (which
+ doesn't have to exist). Create a PersistentGrahamBayes if
+ usedb is True."""
+ if usedb:
+ bayes = PersistentGrahamBayes(pck)
+ else:
+ bayes = None
+ try:
+ fp = open(pck, 'rb')
+ except IOError, e:
+ if e.errno <> errno.ENOENT: raise
+ else:
+ bayes = pickle.load(fp)
+ fp.close()
+ if bayes is None:
+ bayes = classifier.GrahamBayes()
+ return bayes
+
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
***************
*** 304,320 ****
save = False
! if usedb:
! bayes = PersistentGrahamBayes(pck)
! else:
! bayes = None
! try:
! fp = open(pck, 'rb')
! except IOError, e:
! if e.errno <> errno.ENOENT: raise
! else:
! bayes = pickle.load(fp)
! fp.close()
! if bayes is None:
! bayes = classifier.GrahamBayes()
if good:
--- 326,330 ----
save = False
! bayes = createbayes(pck, usedb)
if good: