From anadelonbrin at users.sourceforge.net Tue Jan 3 03:47:28 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue, 3 Jan 2006 03:47:28 +0100 (CET) Subject: [Spambayes-checkins] spambayes/utilities export_apple_mail.py, NONE, 1.1 Message-ID: <20060103024728.E49C11E4002@bag.python.org> Update of /cvsroot/spambayes/spambayes/utilities In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19774/utilities Added Files: export_apple_mail.py Log Message: Simple utility to convert an Apple Mail 2.x user's ~/Library/Mail folder of .emlx files to the standard spambayes testtools format. (Original files are not altered). --- NEW FILE: export_apple_mail.py --- #!/usr/bin/env python """export_apple_mail.py Converts Apple Mail's emlx files to plain text files usable by SpamBayes's testtools. Adding some way to display help would be good. For now, read this file and run the script with the path to the user's ~/Library/Mail directory. (Tested on Windows XP remotely accessing the Mac filesystem. I don't know if the bundling of the files in the Mail directory would effect this script or not, and can't be bothered finding out right now). """ import os import sys from spambayes.Options import options def emlx_to_rfc2822(in_fn, out_fn): """Convert an individual file in Apple Mail's emlx format to a file with just the RFC2822 message. The emlx format is simply the length of the message (as a string) on the first line, then the raw message text, then the contents of a plist (XML) file that contains data that Mail uses (subject, flags, sender, and so forth). We ignore this plist data). """ fin = file(in_fn) fout = file(out_fn, "w") length = int(fin.readline().rstrip()) fout.write(fin.read(length)) plist = fin.read() def export(mail_dir): """Scans through the specified directory, which should be the Apple Mail user's ~\Library\Mail folder, converting all found emlx files to simple RFC2822 messages suitable for use with the SpamBayes testtools. Messages are copied (the originals are left untouched) into the standard SpamBayes testtools setup (all files are put in the reservoir; use rebal.py to distribute). The script assumes that all messages outside of Mail's Junk folder are ham, and all messages inside the Junk folder are spam. Any messages in the "Sent Messages" folders are skipped. A simple extension of this function would allow only certain accounts/mailboxes to be exported. """ for dirname in os.listdir(mail_dir): # There is no mail at the top level. dirname = os.path.join(mail_dir, dirname) if os.path.isdir(dirname): export_directory(mail_dir, dirname) print def export_directory(parent, dirname): if parent == "Junk.mbox": # All of these should be spam. Make sure that you # check for false positives first! dest_dir = os.path.join(\ os.path.dirname(options["TestDriver", "spam_directories"]), "reservoir") elif parent == "Sent Messages.mbox" or parent == "Drafts.mbox": # We don't do anything with outgoing mail. return else: # Everything else is ham. dest_dir = os.path.join(\ os.path.dirname(options["TestDriver", "ham_directories"]), "reservoir") dest_dir = os.path.normpath(dest_dir) for path in os.listdir(dirname): path = os.path.join(dirname, path) if os.path.isdir(path): export_directory(dirname, path) else: fn, ext = os.path.splitext(path) if ext == ".emlx": in_fn = os.path.join(dirname, path) out_fn = os.path.join(dest_dir, os.path.basename(fn) + ".txt") emlx_to_rfc2822(in_fn, out_fn) sys.stdout.write('.') if __name__ == "__main__": export(sys.argv[1]) From anadelonbrin at users.sourceforge.net Wed Jan 4 05:31:43 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed, 4 Jan 2006 05:31:43 +0100 (CET) Subject: [Spambayes-checkins] spambayes/utilities export_apple_mail.py, 1.1, 1.2 Message-ID: <20060104043143.1E5FE1E4002@bag.python.org> Update of /cvsroot/spambayes/spambayes/utilities In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5565/utilities Modified Files: export_apple_mail.py Log Message: Add a few options: showing help, excluding accounts/mailboxes, quiet. Index: export_apple_mail.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/utilities/export_apple_mail.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** export_apple_mail.py 3 Jan 2006 02:47:24 -0000 1.1 --- export_apple_mail.py 4 Jan 2006 04:31:38 -0000 1.2 *************** *** 1,12 **** #!/usr/bin/env python ! """export_apple_mail.py ! ! Converts Apple Mail's emlx files to plain text files usable by SpamBayes's testtools. ! Adding some way to display help would be good. For now, read ! this file and run the script with the path to the user's ! ~/Library/Mail directory. (Tested on Windows XP remotely accessing the Mac filesystem. --- 1,21 ---- #!/usr/bin/env python ! """Convert Apple Mail's emlx files to plain text files usable by SpamBayes's testtools. ! Usage: %(program)s [options] path ! ! Where: ! -h ! Show usage and exit. ! -e mailbox ! Name of mailbox or account to exclude. Drafts and ! Sent Messages are always excluded. ! -q ! Don't print status indicators. ! -o section:option:value ! Set [section, option] in the options database to value. ! ! "path" should be the path to the user's ~/Library/Mail directory. (Tested on Windows XP remotely accessing the Mac filesystem. *************** *** 18,24 **** --- 27,42 ---- import os import sys + import getopt from spambayes.Options import options + def usage(code, msg=''): + """Print usage message and sys.exit(code).""" + if msg: + print >> sys.stderr, msg + print >> sys.stderr + print >> sys.stderr, __doc__ % globals() + sys.exit(code) + def emlx_to_rfc2822(in_fn, out_fn): """Convert an individual file in Apple Mail's emlx format *************** *** 37,41 **** plist = fin.read() ! def export(mail_dir): """Scans through the specified directory, which should be the Apple Mail user's ~\Library\Mail folder, converting --- 55,59 ---- plist = fin.read() ! def export(mail_dir, exclude, quiet): """Scans through the specified directory, which should be the Apple Mail user's ~\Library\Mail folder, converting *************** *** 60,68 **** dirname = os.path.join(mail_dir, dirname) if os.path.isdir(dirname): ! export_directory(mail_dir, dirname) ! print ! def export_directory(parent, dirname): ! if parent == "Junk.mbox": # All of these should be spam. Make sure that you # check for false positives first! --- 78,86 ---- dirname = os.path.join(mail_dir, dirname) if os.path.isdir(dirname): ! export_directory(mail_dir, dirname, exclude, quiet) ! def export_directory(parent, dirname, exclude, quiet): ! base_parent = os.path.splitext(os.path.basename(parent))[0] ! if base_parent == "Junk": # All of these should be spam. Make sure that you # check for false positives first! *************** *** 70,75 **** os.path.dirname(options["TestDriver", "spam_directories"]), "reservoir") ! elif parent == "Sent Messages.mbox" or parent == "Drafts.mbox": ! # We don't do anything with outgoing mail. return else: --- 88,92 ---- os.path.dirname(options["TestDriver", "spam_directories"]), "reservoir") ! elif base_parent in exclude: return else: *************** *** 82,86 **** path = os.path.join(dirname, path) if os.path.isdir(path): ! export_directory(dirname, path) else: fn, ext = os.path.splitext(path) --- 99,103 ---- path = os.path.join(dirname, path) if os.path.isdir(path): ! export_directory(dirname, path, exclude, quiet) else: fn, ext = os.path.splitext(path) *************** *** 90,95 **** os.path.basename(fn) + ".txt") emlx_to_rfc2822(in_fn, out_fn) ! sys.stdout.write('.') if __name__ == "__main__": ! export(sys.argv[1]) --- 107,138 ---- os.path.basename(fn) + ".txt") emlx_to_rfc2822(in_fn, out_fn) ! if not quiet: ! sys.stdout.write('.') ! if not quiet: ! print ! ! def main(): ! try: ! opts, args = getopt.getopt(sys.argv[1:], 'hqe:o:') ! except getopt.error, msg: ! usage(1, msg) ! ! quiet = False ! # We don't do anything with outgoing mail. ! exclude = ["Sent Messages", "Drafts"] ! for opt, arg in opts: ! if opt == '-h': ! usage(0) ! elif opt == '-e': ! exclude.append(arg) ! elif opt == '-q': ! quiet = True ! elif opt in ('-o', '--option'): ! options.set_from_cmdline(arg, sys.stderr) ! ! if len(args) != 1: ! usage(1, "Must specify exactly one path.") ! export(args[0], exclude, quiet) if __name__ == "__main__": ! main() From anadelonbrin at users.sourceforge.net Fri Jan 6 09:39:00 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri, 6 Jan 2006 09:39:00 +0100 (CET) Subject: [Spambayes-checkins] spambayes/spambayes OptionsClass.py, 1.30, 1.31 Message-ID: <20060106083900.680A61E4002@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5904/spambayes Modified Files: OptionsClass.py Log Message: A test "raise" was checked in by mistake at some point; remove it. Index: OptionsClass.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/OptionsClass.py,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** OptionsClass.py 14 Dec 2005 07:33:12 -0000 1.30 --- OptionsClass.py 6 Jan 2006 08:38:52 -0000 1.31 *************** *** 100,104 **** try: import textwrap - raise ImportError except ImportError: # textwrap was added in 2.3 --- 100,103 ---- *************** *** 108,112 **** # could be duplicated here if anyone cared. def wrap(s): ! length = 10 return [s[i:i+length].strip() for i in xrange(0, len(s), length)] else: --- 107,111 ---- # could be duplicated here if anyone cared. def wrap(s): ! length = 40 return [s[i:i+length].strip() for i in xrange(0, len(s), length)] else: