[Spambayes-checkins] spambayes hammie.py,1.2,1.3

Guido van Rossum gvanrossum@users.sourceforge.net
Fri, 06 Sep 2002 13:23:18 -0700


Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv5367

Modified Files:
	hammie.py 
Log Message:
Add a hack to train directly on a mailbox full of .txt files, like
Bruce Guenter's spam archive at http://www.em.ca/~bruceg/spam/.


Index: hammie.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/hammie.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** hammie.py	6 Sep 2002 20:12:05 -0000	1.2
--- hammie.py	6 Sep 2002 20:23:16 -0000	1.3
***************
*** 25,32 ****
--- 25,35 ----
  """
  
+ from __future__ import generators
+ 
  import sys
  import os
  import getopt
  import mailbox
+ import glob
  import email
  import classifier
***************
*** 158,161 ****
--- 161,182 ----
  
  
+ class DirOfTxtFileMailbox:
+ 
+     """Mailbox directory consisting of .txt files."""
+ 
+     def __init__(self, dirname, factory):
+         self.names = glob.glob(os.path.join(dirname, "*.txt"))
+         self.factory = factory
+ 
+     def __iter__(self):
+         for name in self.names:
+             try:
+                 f = open(name)
+             except IOError:
+                 continue
+             yield self.factory(f)
+             f.close()
+ 
+ 
  def train(bayes, msgs, is_spam):
      """Train bayes with a message"""
***************
*** 167,171 ****
  
      if os.path.isdir(msgs):
!         mbox = mailbox.MHMailbox(msgs, _factory)
      else:
          fp = open(msgs)
--- 188,197 ----
  
      if os.path.isdir(msgs):
!         # XXX This is bogus: use an MHMailbox if the pathname contains /Mail/
!         # XXX Should really use '+foo' MH folder styles.  Later.
!         if msgs.find("/Mail/") >= 0:
!             mbox = mailbox.MHMailbox(msgs, _factory)
!         else:
!             mbox = DirOfTxtFileMailbox(msgs, _factory)
      else:
          fp = open(msgs)