[Spambayes-checkins] spambayes README.txt,1.19,1.20 pop3proxy.py,1.1,1.2 hammie.py,1.16,1.17

Richie Hindle richiehindle@users.sourceforge.net
Wed, 18 Sep 2002 15:01:42 -0700


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv20824

Modified Files:
	README.txt pop3proxy.py hammie.py 
Log Message:
Added SPAM_THRESHOLD and createbayes() to hammie, so
that pop3proxy can use them.
Made pop3proxy add simple X-Hammie-Disposition headers
raher than using its own header format.
Made pop3proxy.py obey the Python style guide.
Removed the copyright and license from pop3proxy,py - I've
assigned copyright to the PSF.


Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** README.txt	17 Sep 2002 04:49:16 -0000	1.19
--- README.txt	18 Sep 2002 22:01:39 -0000	1.20
***************
*** 60,63 ****
--- 60,69 ----
      Needs to be made faster, especially for writes.
  
+ pop3proxy.py
+     A spam-classifying POP3 proxy.  It adds a spam-judgement header to
+     each mail as it's retrieved, so you can use your email client's
+     filters to deal with them without needing to fiddle with your email
+     delivery system.
+ 
  
  Concrete Test Drivers

Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** pop3proxy.py	16 Sep 2002 07:57:20 -0000	1.1
--- pop3proxy.py	18 Sep 2002 22:01:39 -0000	1.2
***************
*** 1,31 ****
  #!/usr/bin/env python
  
! # pop3proxy is released under the terms of the following MIT-style license:
! #
! # Copyright (c) Entrian Solutions 2002
! #
! # Permission is hereby granted, free of charge, to any person obtaining a
! # copy of this software and associated documentation files (the "Software"),
! # to deal in the Software without restriction, including without limitation
! # the rights to use, copy, modify, merge, publish, distribute, sublicense,
[...1035 lines suppressed...]
          # Named POP3 server, default port.
!         main( args[ 0 ], 110, 110, pickleName, useDB )
      
!     elif len( args ) == 2:
          # Named POP3 server, named port.
!         main( args[ 0 ], int( args[ 1 ] ), 110, pickleName, useDB )
      
      else:
--- 571,581 ----
          asyncore.loop()
      
!     elif len(args) == 1:
          # Named POP3 server, default port.
!         main(args[0], 110, 110, pickleName, useDB)
      
!     elif len(args) == 2:
          # Named POP3 server, named port.
!         main(args[0], int(args[1]), 110, pickleName, useDB)
      
      else:

Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** hammie.py	12 Sep 2002 05:10:02 -0000	1.16
--- hammie.py	18 Sep 2002 22:01:39 -0000	1.17
***************
*** 47,50 ****
--- 47,53 ----
  DEFAULTDB = "hammie.db"
  
+ # Probability at which a message is considered spam
+ SPAM_THRESHOLD = 0.9
+ 
  # Tim's tokenizer kicks far more booty than anything I would have
  # written.  Score one for analysis ;)
***************
*** 232,236 ****
      msg = email.message_from_file(input)
      prob, clues = bayes.spamprob(tokenize(msg), True)
!     if prob < 0.9:
          disp = "No"
      else:
--- 235,239 ----
      msg = email.message_from_file(input)
      prob, clues = bayes.spamprob(tokenize(msg), True)
!     if prob < SPAM_THRESHOLD:
          disp = "No"
      else:
***************
*** 250,254 ****
          i += 1
          prob, clues = bayes.spamprob(tokenize(msg), True)
!         isspam = prob >= 0.9
          if hasattr(msg, '_mh_msgno'):
              msgno = msg._mh_msgno
--- 253,257 ----
          i += 1
          prob, clues = bayes.spamprob(tokenize(msg), True)
!         isspam = prob >= SPAM_THRESHOLD
          if hasattr(msg, '_mh_msgno'):
              msgno = msg._mh_msgno
***************
*** 263,266 ****
--- 266,288 ----
      print "Total %d spam, %d ham" % (spams, hams)
  
+ def createbayes(pck=DEFAULTDB, usedb=False):
+     """Create a GrahamBayes instance for the given pickle (which
+     doesn't have to exist).  Create a PersistentGrahamBayes if
+     usedb is True."""
+     if usedb:
+         bayes = PersistentGrahamBayes(pck)
+     else:
+         bayes = None
+         try:
+             fp = open(pck, 'rb')
+         except IOError, e:
+             if e.errno <> errno.ENOENT: raise
+         else:
+             bayes = pickle.load(fp)
+             fp.close()
+         if bayes is None:
+             bayes = classifier.GrahamBayes()
+     return bayes
+ 
  def usage(code, msg=''):
      """Print usage message and sys.exit(code)."""
***************
*** 304,320 ****
      save = False
  
!     if usedb:
!         bayes = PersistentGrahamBayes(pck)
!     else:
!         bayes = None
!         try:
!             fp = open(pck, 'rb')
!         except IOError, e:
!             if e.errno <> errno.ENOENT: raise
!         else:
!             bayes = pickle.load(fp)
!             fp.close()
!         if bayes is None:
!             bayes = classifier.GrahamBayes()
  
      if good:
--- 326,330 ----
      save = False
  
!     bayes = createbayes(pck, usedb)
  
      if good: