From timstone4 at users.sourceforge.net Sat Feb 1 09:57:07 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 1 12:57:10 2003 Subject: [Spambayes-checkins] spambayes dbExpImp.py,1.1,1.2 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv22946 Modified Files: dbExpImp.py Log Message: Corrected problem iterating dbm based classifier. There is currently a semantic difference between the implementation of pickledClassifier and DBDictClassifier. This should go away if we ever stop using pickles... Index: dbExpImp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/dbExpImp.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** dbExpImp.py 31 Jan 2003 20:01:50 -0000 1.1 --- dbExpImp.py 1 Feb 2003 17:57:04 -0000 1.2 *************** *** 46,49 **** --- 46,50 ---- -e : export -i : import + -v : verbose mode (some additional diagnostic messages) -f: FN : flat file to export to or import from -d: FN : name of pickled database file to use *************** *** 58,80 **** Examples: ! dbExpImp -e -d mybayes.db -f mybayes.db.export ! Exports pickled mybayes.db into mybayes.db.export as a csv flat file ! ! dbExpImp -i -D mybayes.db -f mybayes.db.export ! Imports mybayes.eb.export into a new DBM mybayes.db ! dbExpImp -e -i -n -d mybayes.db -f mybayes.db.export ! Exports then imports (reorganizes) new pickled mybayes.db ! dbExpImp -e -d abayes.db -f abayes.export ! dbExpImp -i -D abayes.db -f abayes.export ! Converts a bayes database from pickle to DBM ! dbExpImp -e -d abayes.db -f abayes.export ! dbExpImp -e -d bbayes.db -f bbayes.export ! dbExpImp -i -d newbayes.db -f abayes.export ! dbExpImp -i -m -d newbayes.db -f bbayes.export ! Creates a new database (newbayes.db) from two databases (abayes.db, bbayes.db) To Do: --- 59,81 ---- Examples: ! Export pickled mybayes.db into mybayes.db.export as a csv flat file ! dbExpImp -e -d mybayes.db -f mybayes.db.export ! Import mybayes.eb.export into a new DBM mybayes.db ! dbExpImp -i -D mybayes.db -f mybayes.db.export ! ! Export, then import (reorganize) new pickled mybayes.db ! dbExpImp -e -i -n -d mybayes.db -f mybayes.db.export ! Convert a bayes database from pickle to DBM ! dbExpImp -e -d abayes.db -f abayes.export ! dbExpImp -i -D abayes.db -f abayes.export ! Create a new database (newbayes.db) from two databases (abayes.db, bbayes.db) + dbExpImp -e -d abayes.db -f abayes.export + dbExpImp -e -d bbayes.db -f bbayes.export + dbExpImp -i -d newbayes.db -f abayes.export + dbExpImp -i -m -d newbayes.db -f bbayes.export To Do: *************** *** 92,95 **** --- 93,97 ---- import storage + from spambayes.Options import options import sys, os, getopt, errno, re import urllib *************** *** 100,105 **** --- 102,110 ---- if useDBM: bayes = storage.DBDictClassifier(dbFN) + words = bayes.db.keys() + words.remove(bayes.statekey) else: bayes = storage.PickledClassifier(dbFN) + words = bayes.wordinfo.keys() try: *************** *** 115,121 **** fp.write("%s,%s,\n" % (nham, nspam)) ! for word in bayes.wordinfo: ! hamcount = bayes.wordinfo[word].hamcount ! spamcount = bayes.wordinfo[word].spamcount word = urllib.quote(word) fp.write("%s`%s`%s`\n" % (word, hamcount, spamcount)) --- 120,127 ---- fp.write("%s,%s,\n" % (nham, nspam)) ! for word in words: ! wi = bayes._wordinfoget(word) ! hamcount = wi.hamcount ! spamcount = wi.spamcount word = urllib.quote(word) fp.write("%s`%s`%s`\n" % (word, hamcount, spamcount)) *************** *** 177,181 **** try: ! opts, args = getopt.getopt(sys.argv[1:], 'iehmd:D:f:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ --- 183,187 ---- try: ! opts, args = getopt.getopt(sys.argv[1:], 'iehmvd:D:f:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ *************** *** 208,211 **** --- 214,219 ---- elif opt == '-m': newDBM = False + elif opt == '-v': + options.verbose = True if (dbFN and flatFN): From timstone4 at users.sourceforge.net Sat Feb 1 11:07:12 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 1 14:07:16 2003 Subject: [Spambayes-checkins] spambayes dbExpImp.py,1.2,1.3 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv18578 Modified Files: dbExpImp.py Log Message: Cleaned up and added some informational messages. Index: dbExpImp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/dbExpImp.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** dbExpImp.py 1 Feb 2003 17:57:04 -0000 1.2 --- dbExpImp.py 1 Feb 2003 19:07:10 -0000 1.3 *************** *** 92,96 **** from __future__ import generators ! import storage from spambayes.Options import options import sys, os, getopt, errno, re --- 92,96 ---- from __future__ import generators ! import spambayes.storage from spambayes.Options import options import sys, os, getopt, errno, re *************** *** 99,109 **** def runExport(dbFN, useDBM, outFN): - print "running export on %s" % (dbFN) if useDBM: ! bayes = storage.DBDictClassifier(dbFN) words = bayes.db.keys() words.remove(bayes.statekey) else: ! bayes = storage.PickledClassifier(dbFN) words = bayes.wordinfo.keys() --- 99,108 ---- def runExport(dbFN, useDBM, outFN): if useDBM: ! bayes = spambayes.storage.DBDictClassifier(dbFN) words = bayes.db.keys() words.remove(bayes.statekey) else: ! bayes = spambayes.storage.PickledClassifier(dbFN) words = bayes.wordinfo.keys() *************** *** 116,120 **** nham = bayes.nham; nspam = bayes.nspam; ! print "nham %s, nspam %s" % (nham, nspam) fp.write("%s,%s,\n" % (nham, nspam)) --- 115,122 ---- nham = bayes.nham; nspam = bayes.nspam; ! ! print "Exporting database %s to file %s" % (dbFN, outFN) ! print "Database has %s ham, %s spam, and %s words" \ ! % (nham, nspam, len(words)) fp.write("%s,%s,\n" % (nham, nspam)) *************** *** 138,145 **** raise if useDBM: ! bayes = storage.DBDictClassifier(dbFN) else: ! bayes = storage.PickledClassifier(dbFN) try: --- 140,159 ---- raise + try: + os.unlink(dbFN+".dat") + except OSError, e: + if e.errno != 2: # errno. + raise + + try: + os.unlink(dbFN+".dir") + except OSError, e: + if e.errno != 2: # errno. + raise + if useDBM: ! bayes = spambayes.storage.DBDictClassifier(dbFN) else: ! bayes = spambayes.storage.PickledClassifier(dbFN) try: *************** *** 150,154 **** nline = fp.readline() - print nline (nham, nspam, junk) = re.split(',', nline) --- 164,167 ---- *************** *** 160,163 **** --- 173,183 ---- bayes.nspam += nspam + if newDBM: + impType = "Importing" + else: + impType = "Merging" + + print "%s database %s using file %s" % (impType, dbFN, inFN) + lines = fp.readlines() *************** *** 177,181 **** --- 197,216 ---- fp.close() + + print "Storing database, please be patient. Even moderately large" + print "databases may take a very long time to store." bayes.store() + print "Finished storing database" + + if useDBM: + words = bayes.db.keys() + words.remove(bayes.statekey) + else: + words = bayes.wordinfo.keys() + + print "Database has %s ham, %s spam, and %s words" \ + % (bayes.nham, bayes.nspam, len(words)) + + From npickett at users.sourceforge.net Sun Feb 2 10:31:46 2003 From: npickett at users.sourceforge.net (Neale Pickett) Date: Sun Feb 2 13:31:48 2003 Subject: [Spambayes-checkins] spambayes/contrib muttrc,1.1,1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1:/tmp/cvs-serv19216 Modified Files: muttrc Log Message: * H refiles as ham, not spam (thanks Richie Hindle) Index: muttrc =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/muttrc,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** muttrc 27 Jan 2003 21:12:46 -0000 1.1 --- muttrc 2 Feb 2003 18:31:44 -0000 1.2 *************** *** 11,15 **** ## ## This binds 'S' to refile as spam and move to a 'caughtspam' folder. ! ## 'H' will refile as spam and move to your inbox. You will want to use ## these on every misclassified message. ## --- 11,15 ---- ## ## This binds 'S' to refile as spam and move to a 'caughtspam' folder. ! ## 'H' will refile as ham and move to your inbox. You will want to use ## these on every misclassified message. ## From montanaro at users.sourceforge.net Sun Feb 2 10:28:30 2003 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun Feb 2 13:38:38 2003 Subject: [Spambayes-checkins] spambayes/contrib mod_spambayes.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1:/tmp/cvs-serv17978 Added Files: mod_spambayes.py Log Message: A simple Spambayes plugin for Amit Patel's Python-based proxy3 web proxy. --- NEW FILE: mod_spambayes.py --- ## ## This is a simple Spambayes plugin for Amit Patel's proxy3 web proxy: ## http://theory.stanford.edu/~amitp/proxy.html ## ## Author: Skip Montanaro ## import os from proxy3_filter import * import proxy3_options from spambayes import hammie, Options, mboxutils dbf = os.path.expanduser(Options.options.hammiefilter_persistent_storage_file) class SpambayesFilter(BufferAllFilter): hammie = hammie.open(dbf, 1, 'r') def filter(self, s): if self.reply.split()[1] == '200': prob = self.hammie.score("%s\r\n%s" % (self.serverheaders, s)) print "| prob: %.5f" % prob if prob >= Options.options.spam_cutoff: print self.serverheaders print "text:", s[0:40], "...", s[-40:] return "not authorized" return s from proxy3_util import * register_filter('*/*', 'text/html', SpambayesFilter) From richiehindle at users.sourceforge.net Sun Feb 2 23:53:14 2003 From: richiehindle at users.sourceforge.net (Richie Hindle) Date: Mon Feb 3 02:53:17 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.50,1.51 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv11575 Modified Files: pop3proxy.py Log Message: Run the database filename through os.path.expanduser(). Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** pop3proxy.py 31 Jan 2003 19:13:41 -0000 1.50 --- pop3proxy.py 3 Feb 2003 07:53:12 -0000 1.51 *************** *** 1156,1165 **** options.pop3proxy_persistent_storage_file = \ '_pop3proxy_test.pickle' # This is never saved. if self.useDB: ! self.bayes = storage.DBDictClassifier( \ ! options.pop3proxy_persistent_storage_file) else: ! self.bayes = storage.PickledClassifier(\ ! options.pop3proxy_persistent_storage_file) print "Done." --- 1156,1165 ---- options.pop3proxy_persistent_storage_file = \ '_pop3proxy_test.pickle' # This is never saved. + filename = options.pop3proxy_persistent_storage_file + filename = os.path.expanduser(filename) if self.useDB: ! self.bayes = storage.DBDictClassifier(filename) else: ! self.bayes = storage.PickledClassifier(filename) print "Done." From richiehindle at users.sourceforge.net Sun Feb 2 23:54:16 2003 From: richiehindle at users.sourceforge.net (Richie Hindle) Date: Mon Feb 3 02:54:19 2003 Subject: [Spambayes-checkins] spambayes setup.py,1.15,1.16 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv12046 Modified Files: setup.py Log Message: Install dbExpImp.py with the other scripts. Index: setup.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/setup.py,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** setup.py 25 Jan 2003 16:18:41 -0000 1.15 --- setup.py 3 Feb 2003 07:54:13 -0000 1.16 *************** *** 39,42 **** --- 39,43 ---- 'pop3proxy.py', 'proxytee.py', + 'dbExpImp.py', ], packages = [ From richiehindle at users.sourceforge.net Mon Feb 3 00:07:49 2003 From: richiehindle at users.sourceforge.net (Richie Hindle) Date: Mon Feb 3 03:07:52 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.9,1.10 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv16398 Modified Files: Options.py Log Message: pop3proxy.py now uses a database storage rather than a pickle by default. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** Options.py 24 Jan 2003 23:59:22 -0000 1.9 --- Options.py 3 Feb 2003 08:07:46 -0000 1.10 *************** *** 385,389 **** pop3proxy_ham_cache: pop3proxy-ham-cache pop3proxy_unknown_cache: pop3proxy-unknown-cache ! pop3proxy_persistent_use_database: False pop3proxy_persistent_storage_file: hammie.db --- 385,389 ---- pop3proxy_ham_cache: pop3proxy-ham-cache pop3proxy_unknown_cache: pop3proxy-unknown-cache ! pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db From mhammond at users.sourceforge.net Mon Feb 3 14:46:28 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 17:46:31 2003 Subject: [Spambayes-checkins] spambayes/spambayes dbmstorage.py,1.2,1.3 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv26877 Modified Files: dbmstorage.py Log Message: Python 2.3 on Windows can safely use the bsddb module, as it is really bsddb3! Something similar may be needed for other platforms, but I will leave that to the platform experts. Index: dbmstorage.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/dbmstorage.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** dbmstorage.py 14 Jan 2003 05:38:20 -0000 1.2 --- dbmstorage.py 3 Feb 2003 22:46:26 -0000 1.3 *************** *** 13,17 **** def open_dbhash(*args): ! """Open a bsddb hash. Don't use this on Windows.""" import bsddb return bsddb.hashopen(*args) --- 13,18 ---- def open_dbhash(*args): ! """Open a bsddb hash. Don't use this on Windows, unless Python 2.3 or ! greater is used, in which case bsddb3 is actually named bsddb.""" import bsddb return bsddb.hashopen(*args) *************** *** 29,33 **** --- 30,40 ---- def open_best(*args): if sys.platform == "win32": + # Note that Python 2.3 and later ship with the new bsddb interface + # as the default bsddb module - so 2.3 can use the old name safely. funcs = [open_db3hash, open_gdbm, open_dumbdbm] + if sys.version_info >= (2,3): + funcs.insert(0, open_dbhash) + else: + funcs.insert(0, open_db3hash) else: funcs = [open_db3hash, open_dbhash, open_gdbm, open_dumbdbm] From mhammond at users.sourceforge.net Mon Feb 3 14:48:05 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 17:48:08 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 tester.py,1.2,1.3 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv28271 Modified Files: tester.py Log Message: Allow the test suite to work with a bsddb based classifier. Index: tester.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/tester.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** tester.py 16 Dec 2002 04:11:59 -0000 1.2 --- tester.py 3 Feb 2003 22:48:03 -0000 1.3 *************** *** 30,33 **** --- 30,50 ---- sleep(0.01) + def DictExtractor(bayes): + for k, v in bayes.wordinfo.items(): + yield k, v + + def DBExtractor(bayes): + import bsddb + key = bayes.dbm.first()[0] + if key not in ["saved state"]: + yield key, bayes._wordinfoget(key) + while True: + try: + key = bayes.dbm.next()[0] + except bsddb.error: + break + if key not in ["saved state"]: + yield key, bayes._wordinfoget(key) + # Find the top 'n' words in the Spam database that are clearly # marked as either ham or spam. Simply enumerates the *************** *** 36,40 **** def FindTopWords(bayes, num, get_spam): items = [] ! for word, info in bayes.wordinfo.items(): if ":" in word: continue --- 53,63 ---- def FindTopWords(bayes, num, get_spam): items = [] ! try: ! bayes.db # bsddb style ! extractor = DBExtractor ! except AttributeError: ! extractor = DictExtractor ! ! for word, info in extractor(bayes): if ":" in word: continue From mhammond at users.sourceforge.net Mon Feb 3 18:19:07 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 21:19:11 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 train.py,1.23,1.24 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv11695 Modified Files: train.py Log Message: For the message db to be able to work with a bsddb database, we use string values rather than integers, and avoid the use of "get()" Index: train.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/train.py,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** train.py 14 Jan 2003 05:38:20 -0000 1.23 --- train.py 4 Feb 2003 02:19:05 -0000 1.24 *************** *** 18,29 **** # latter changes after a Move operation - see msgstore.py def been_trained_as_ham(msg, mgr): ! spam = mgr.message_db.get(msg.searchkey) ! # spam is None ! return spam == False def been_trained_as_spam(msg, mgr): ! spam = mgr.message_db.get(msg.searchkey) ! # spam is None ! return spam == True def train_message(msg, is_spam, mgr, rescore=False): --- 18,29 ---- # latter changes after a Move operation - see msgstore.py def been_trained_as_ham(msg, mgr): ! if not mgr.message_db.has_key(msg.searchkey): ! return False ! return mgr.message_db[msg.searchkey]=='0' def been_trained_as_spam(msg, mgr): ! if not mgr.message_db.has_key(msg.searchkey): ! return False ! return mgr.message_db[msg.searchkey]=='1' def train_message(msg, is_spam, mgr, rescore=False): *************** *** 36,40 **** from spambayes.tokenizer import tokenize ! was_spam = mgr.message_db.get(msg.searchkey) if was_spam == is_spam: return False # already correctly classified --- 36,43 ---- from spambayes.tokenizer import tokenize ! if not mgr.message_db.has_key(msg.searchkey): ! was_spam = None ! else: ! was_spam = mgr.message_db[msg.searchkey]=='1' if was_spam == is_spam: return False # already correctly classified *************** *** 48,52 **** # Learn the correct classification. mgr.bayes.learn(tokenize(stream), is_spam) ! mgr.message_db[msg.searchkey] = is_spam mgr.bayes_dirty = True --- 51,55 ---- # Learn the correct classification. mgr.bayes.learn(tokenize(stream), is_spam) ! mgr.message_db[msg.searchkey] = ['0', '1'][is_spam] mgr.bayes_dirty = True From mhammond at users.sourceforge.net Mon Feb 3 18:19:48 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 21:19:51 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.41,1.42 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv11936 Modified Files: manager.py Log Message: If a new bsddb, or bsddb3 module is available, use this instead of a pickle. If this is available on your system, you will need to do a full retrain. Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** manager.py 14 Jan 2003 05:38:19 -0000 1.41 --- manager.py 4 Feb 2003 02:19:46 -0000 1.42 *************** *** 4,7 **** --- 4,8 ---- import os import sys + import errno import win32com.client *************** *** 34,37 **** --- 35,51 ---- this_filename = os.path.abspath(sys.argv[0]) + # See if we can use the new bsddb module. (The old one is unreliable + # on Windows, so we don't use that) + try: + from bsddb import db # This name doesn't exist in the old one. + use_db = True + except ImportError: + # See if the explicit bsddb3 module exists. + try: + import bsddb3 + use_db = True + except ImportError: + use_db = False + # This is a little bit of a hack . We are generally in a child directory # of the bayes code. To help installation, we handle the fact that this may *************** *** 40,44 **** # spambayes code before setting that envar, our .ini file may have no effect). def import_core_spambayes_stuff(ini_filename): ! global bayes_classifier, bayes_tokenize os.environ["BAYESCUSTOMIZE"] = ini_filename --- 54,58 ---- # spambayes code before setting that envar, our .ini file may have no effect). def import_core_spambayes_stuff(ini_filename): ! global bayes_classifier, bayes_tokenize, bayes_storage os.environ["BAYESCUSTOMIZE"] = ini_filename *************** *** 52,61 **** --- 66,137 ---- from spambayes import classifier from spambayes.tokenizer import tokenize + from spambayes import storage bayes_classifier = classifier bayes_tokenize = tokenize + bayes_storage = storage class ManagerError(Exception): pass + # Base class for our "storage manager" - we choose between the pickle + # and DB versions at runtime. As our bayes uses spambayes.storage, + # our base class can share common bayes loading code. + class BasicStorageManager: + db_extension = None # for pychecker - overwritten by subclass + def __init__(self, bayes_base_name, mdb_base_name): + self.bayes_filename = bayes_base_name + self.db_extension + self.mdb_filename = mdb_base_name + self.db_extension + def new_bayes(self): + # Just delete the file and do an "open" + try: + os.unlink(self.bayes_filename) + except IOError, e: + if e.errno != errno.ENOENT: raise + return self.open_bayes() + def store_bayes(self, bayes): + bayes.store() + def open_bayes(self): + raise NotImplementedError + + class PickleStorageManager(BasicStorageManager): + db_extension = ".pck" + def open_bayes(self): + return bayes_storage.PickledClassifier(self.bayes_filename) + def close_bayes(self, bayes): + pass + def open_mdb(self): + return cPickle.load(open(self.mdb_filename, 'rb')) + def new_mdb(self): + return {} + def store_mdb(self, mdb): + cPickle.dump(mdb, open(self.mdb_filename,"wb"), 1) + def close_mdb(self, mdb): + pass + + class DBStorageManager(BasicStorageManager): + db_extension = ".db" + def open_bayes(self): + return bayes_storage.DBDictClassifier(self.bayes_filename) + def close_bayes(self, bayes): + bayes.db.close() + bayes.dbm.close() + def open_mdb(self): + try: + import bsddb + except ImportError: + import bsddb3 as bsddb + return bsddb.hashopen(self.mdb_filename) + def new_mdb(self): + try: + os.unlink(self.mdb_filename) + except IOError, e: + if e.errno != errno.ENOENT: raise + return self.open_mdb() + def store_mdb(self, mdb): + mdb.sync() + def close_mdb(self, mdb): + mdb.close() + + # Our main "bayes manager" class BayesManager: def __init__(self, config_base="default", outlook=None, verbose=1): *************** *** 68,76 **** config_base = os.path.abspath(config_base) self.ini_filename = config_base + "_bayes_customize.ini" - self.bayes_filename = config_base + "_bayes_database.pck" - self.message_db_filename = config_base + "_message_database.pck" self.config_filename = config_base + "_configuration.pck" ! # First read the configuration file. self.config = self.LoadConfig() --- 144,150 ---- config_base = os.path.abspath(config_base) self.ini_filename = config_base + "_bayes_customize.ini" self.config_filename = config_base + "_configuration.pck" ! # Read the configuration file. self.config = self.LoadConfig() *************** *** 78,81 **** --- 152,163 ---- import_core_spambayes_stuff(self.ini_filename) + + bayes_base = config_base + "_bayes_database" + mdb_base = config_base + "_message_database" + # determine which db manager to use, and create it. + ManagerClass = [PickleStorageManager, DBStorageManager][use_db] + self.db_manager = ManagerClass(bayes_base, mdb_base) + + self.bayes = self.message_db = None self.LoadBayes() self.message_store = msgstore.MAPIMsgStore(outlook) *************** *** 115,119 **** # (which really is OK!) assert self.outlook is not None, "I need outlook :(" - ol = self.outlook msgstore_folder = self.message_store.GetFolder(folder_id) folder = msgstore_folder.GetOutlookItem() --- 197,200 ---- *************** *** 161,174 **** def LoadBayes(self): if not os.path.exists(self.ini_filename): raise ManagerError("The file '%s' must exist before the " "database '%s' can be opened or created" % ( ! self.ini_filename, self.bayes_filename)) bayes = message_db = None try: ! bayes = cPickle.load(open(self.bayes_filename, 'rb')) ! print "Loaded bayes database from '%s'" % (self.bayes_filename,) ! except IOError: ! pass # ignore file-not-found except: print "Failed to load bayes database" --- 242,256 ---- def LoadBayes(self): + import time + start = time.clock() if not os.path.exists(self.ini_filename): raise ManagerError("The file '%s' must exist before the " "database '%s' can be opened or created" % ( ! self.ini_filename, self.db_manager.bayes_filename)) bayes = message_db = None try: ! # file-not-found handled gracefully by storage. ! bayes = self.db_manager.open_bayes() ! print "Loaded bayes database from '%s'" % (self.db_manager.bayes_filename,) except: print "Failed to load bayes database" *************** *** 176,181 **** traceback.print_exc() try: ! message_db = cPickle.load(open(self.message_db_filename, 'rb')) ! print "Loaded message database from '%s'" % (self.message_db_filename,) except IOError: pass --- 258,263 ---- traceback.print_exc() try: ! message_db = self.db_manager.open_mdb() ! print "Loaded message database from '%s'" % (self.db_manager.mdb_filename,) except IOError: pass *************** *** 185,188 **** --- 267,272 ---- traceback.print_exc() if bayes is None or message_db is None: + self.bayes = bayes + self.message_db = message_db print "Either bayes database or message database is missing - creating new" self.InitNewBayes() *************** *** 193,203 **** "%d spam and %d good messages" % (bayes.nspam, bayes.nham)) if len(message_db) != bayes.nham + bayes.nspam: ! print "*** - message database only has %d messages - bayes has %d - something is screwey" % \ (len(message_db), bayes.nham + bayes.nspam) self.bayes = bayes self.message_db = message_db self.bayes_dirty = False def LoadConfig(self): try: f = open(self.config_filename, 'rb') --- 277,290 ---- "%d spam and %d good messages" % (bayes.nspam, bayes.nham)) if len(message_db) != bayes.nham + bayes.nspam: ! print "*** - message database has %d messages - bayes has %d - something is screwey" % \ (len(message_db), bayes.nham + bayes.nspam) self.bayes = bayes self.message_db = message_db self.bayes_dirty = False + if self.verbose: + print "Loaded databases in %gms" % ((time.clock()-start)*1000) def LoadConfig(self): + # Our 'config' file always uses a pickle try: f = open(self.config_filename, 'rb') *************** *** 228,233 **** def InitNewBayes(self): ! self.bayes = bayes_classifier.Bayes() ! self.message_db = {} # OK, so its not quite a DB yet self.bayes_dirty = True --- 315,324 ---- def InitNewBayes(self): ! if self.bayes is not None: ! self.db_manager.close_bayes(self.bayes) ! if self.message_db is not None: ! self.db_manager.close_mdb(self.message_db) ! self.bayes = self.db_manager.new_bayes() ! self.message_db = self.db_manager.new_mdb() self.bayes_dirty = True *************** *** 243,251 **** print "Saving bayes database with %d spam and %d good messages" %\ (bayes.nspam, bayes.nham) ! print " ->", self.bayes_filename ! cPickle.dump(bayes, open(self.bayes_filename,"wb"), 1) if self.verbose: ! print " ->", self.message_db_filename ! cPickle.dump(self.message_db, open(self.message_db_filename,"wb"), 1) self.bayes_dirty = False --- 334,342 ---- print "Saving bayes database with %d spam and %d good messages" %\ (bayes.nspam, bayes.nham) ! print " ->", self.db_manager.bayes_filename ! self.db_manager.store_bayes(self.bayes) if self.verbose: ! print " ->", self.db_manager.mdb_filename ! self.db_manager.store_mdb(self.message_db) self.bayes_dirty = False From mhammond at users.sourceforge.net Mon Feb 3 19:15:24 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 22:15:30 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000/dialogs FilterDialog.py,1.14,1.15 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/dialogs In directory sc8-pr-cvs1:/tmp/cvs-serv30405 Modified Files: FilterDialog.py Log Message: Add tick-marks to the dialog. Index: FilterDialog.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/FilterDialog.py,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** FilterDialog.py 25 Jan 2003 06:51:01 -0000 1.14 --- FilterDialog.py 4 Feb 2003 03:15:21 -0000 1.15 *************** *** 53,57 **** [BUTTON, "Certain Spam", -1, (7,43,235,65), cs | win32con.BS_GROUPBOX], [STATIC, certain_spam_msg, -1, (13,52,212,10), cs], ! ["msctls_trackbar32", "", IDC_SLIDER_CERTAIN, (13,66,165,12), cs | commctrl.TBS_BOTH | commctrl.TBS_NOTICKS], [EDIT, "", IDC_EDIT_CERTAIN, (184,63,51,14), csts | win32con.ES_AUTOHSCROLL | win32con.WS_BORDER], [STATIC, "and these messages should be", -1, (13,76,107,10), cs], --- 53,57 ---- [BUTTON, "Certain Spam", -1, (7,43,235,65), cs | win32con.BS_GROUPBOX], [STATIC, certain_spam_msg, -1, (13,52,212,10), cs], ! ["msctls_trackbar32", "", IDC_SLIDER_CERTAIN, (13,62,165,12), cs | commctrl.TBS_BOTH | commctrl.TBS_AUTOTICKS ], [EDIT, "", IDC_EDIT_CERTAIN, (184,63,51,14), csts | win32con.ES_AUTOHSCROLL | win32con.WS_BORDER], [STATIC, "and these messages should be", -1, (13,76,107,10), cs], *************** *** 63,67 **** [BUTTON, "Possible Spam", -1, (7,114,235,68), cs | win32con.BS_GROUPBOX], [STATIC, unsure_msg, -1, (13,124,212,10), cs], ! ["msctls_trackbar32", "", IDC_SLIDER_UNSURE, (13,141,165,12), cs | commctrl.TBS_BOTH | commctrl.TBS_NOTICKS], [EDIT, "", IDC_EDIT_UNSURE, (184,137,54,14), csts | win32con.ES_AUTOHSCROLL | win32con.WS_BORDER], [STATIC, "and these messages should be", -1, (13,150,107,10), cs], --- 63,67 ---- [BUTTON, "Possible Spam", -1, (7,114,235,68), cs | win32con.BS_GROUPBOX], [STATIC, unsure_msg, -1, (13,124,212,10), cs], ! ["msctls_trackbar32", "", IDC_SLIDER_UNSURE, (13,137,165,12), cs | commctrl.TBS_BOTH | commctrl.TBS_AUTOTICKS], [EDIT, "", IDC_EDIT_UNSURE, (184,137,54,14), csts | win32con.ES_AUTOHSCROLL | win32con.WS_BORDER], [STATIC, "and these messages should be", -1, (13,150,107,10), cs], *************** *** 226,229 **** --- 226,230 ---- slider.SetLineSize(1) slider.SetPageSize(5) + slider.SetTicFreq(10) self._AdjustSliderToEdit(idc_slider, idc_edit) *************** *** 384,388 **** class Config: pass ! class Manager: pass mgr = Manager() mgr.message_store = msgstore.MAPIMsgStore() --- 385,392 ---- class Config: pass ! class Manager: ! def FormatFolderNames(self, folder_ids, include_sub): ! return "Folder 1; Folder 2" ! mgr = Manager() mgr.message_store = msgstore.MAPIMsgStore() *************** *** 391,395 **** inbox = outlook.Session.GetDefaultFolder(constants.olFolderInbox) config.filter.watch_folder_ids = [(inbox.StoreID, inbox.EntryID)] ! config.filter.watch_folder_include_sub = True config.filter.spam_folder_id = "" config.filter.spam_action = "Mo" --- 395,399 ---- inbox = outlook.Session.GetDefaultFolder(constants.olFolderInbox) config.filter.watch_folder_ids = [(inbox.StoreID, inbox.EntryID)] ! config.filter.watch_include_sub = True config.filter.spam_folder_id = "" config.filter.spam_action = "Mo" *************** *** 406,411 **** config.filter_now.action_all = True ! #tester = FilterArrivalsDialog ! tester = FilterNowDialog d = tester(mgr, None) if d.DoModal() == win32con.IDOK: --- 410,415 ---- config.filter_now.action_all = True ! tester = FilterArrivalsDialog ! ## tester = FilterNowDialog d = tester(mgr, None) if d.DoModal() == win32con.IDOK: From mhammond at users.sourceforge.net Mon Feb 3 20:00:12 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Mon Feb 3 23:00:15 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 filter.py,1.14,1.15 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv11384 Modified Files: filter.py Log Message: Fix 677804 - exception when messages are "untouched". Index: filter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** filter.py 24 Nov 2002 22:43:43 -0000 1.14 --- filter.py 4 Feb 2003 04:00:08 -0000 1.15 *************** *** 32,41 **** folder_id = getattr(config, attr_prefix + "_folder_id") action = getattr(config, attr_prefix + "_action").lower() ! if action.startswith("no"): pass ! elif action.startswith("co"): dest_folder = mgr.message_store.GetFolder(folder_id) msg.CopyTo(dest_folder) ! elif action.startswith("mo"): dest_folder = mgr.message_store.GetFolder(folder_id) msg.MoveTo(dest_folder) --- 32,41 ---- folder_id = getattr(config, attr_prefix + "_folder_id") action = getattr(config, attr_prefix + "_action").lower() ! if action.startswith("un"): # untouched pass ! elif action.startswith("co"): # copied dest_folder = mgr.message_store.GetFolder(folder_id) msg.CopyTo(dest_folder) ! elif action.startswith("mo"): # Moved dest_folder = mgr.message_store.GetFolder(folder_id) msg.MoveTo(dest_folder) From mhammond at users.sourceforge.net Mon Feb 3 22:17:34 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Tue Feb 4 01:17:38 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 addin.py,1.47,1.48 filter.py,1.15,1.16 msgstore.py,1.38,1.39 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv19811 Modified Files: addin.py filter.py msgstore.py Log Message: Fix [ 642740 ] "Recover from Spam" wrong folder We now remember the folder from where we filtered a message from, so "Recover from Spam" restores it to the correct, original folder. Index: addin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** addin.py 27 Jan 2003 22:30:24 -0000 1.47 --- addin.py 4 Feb 2003 06:17:32 -0000 1.48 *************** *** 339,343 **** if not msgstore_messages: return - # Recover to where they were moved from # Get the inbox as the default place to restore to # (incase we dont know (early code) or folder removed etc --- 339,342 ---- *************** *** 347,353 **** import train for msgstore_message in msgstore_messages: # Must train before moving, else we lose the message! subject = msgstore_message.GetSubject() ! print "Recovering and ham training message '%s' - " % (subject,), if train.train_message(msgstore_message, False, self.manager, rescore = True): print "trained as ham" --- 346,357 ---- import train for msgstore_message in msgstore_messages: + # Recover where they were moved from + restore_folder = msgstore_message.GetRememberedFolder() + if restore_folder is None: + restore_folder = inbox_folder + # Must train before moving, else we lose the message! subject = msgstore_message.GetSubject() ! print "Recovering to folder '%s' and ham training message '%s' - " % (restore_folder.name, subject), if train.train_message(msgstore_message, False, self.manager, rescore = True): print "trained as ham" *************** *** 356,360 **** # Now move it. # XXX - still don't write the source, so no point looking :( ! msgstore_message.MoveTo(inbox_folder) # Note the move will possibly also trigger a re-train # but we are smart enough to know we have already done it. --- 360,364 ---- # Now move it. # XXX - still don't write the source, so no point looking :( ! msgstore_message.MoveTo(restore_folder) # Note the move will possibly also trigger a re-train # but we are smart enough to know we have already done it. Index: filter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** filter.py 4 Feb 2003 04:00:08 -0000 1.15 --- filter.py 4 Feb 2003 06:17:32 -0000 1.16 *************** *** 26,30 **** --- 26,33 ---- try: + # Save the score msg.SetField(mgr.config.field_score_name, prob) + # and the ID of the folder we were in when scored. + msg.RememberMessageCurrentFolder() msg.Save() Index: msgstore.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** msgstore.py 23 Jan 2003 12:17:35 -0000 1.38 --- msgstore.py 4 Feb 2003 06:17:32 -0000 1.39 *************** *** 694,697 **** --- 694,744 ---- def CopyTo(self, folder): self._DoCopyMove(folder, False) + def GetFolder(self): + # return a folder object with the parent, or None + folder = self.msgstore._OpenEntry(self.id) + prop_ids = PR_PARENT_ENTRYID, + hr, data = folder.GetProps(prop_ids,0) + # Put parent ids together + parent_eid = data[0][1] + parent_id = self.id[0], parent_eid + parent = self.msgstore._OpenEntry(parent_id) + # Finally get the display name. + hr, data = folder.GetProps((PR_DISPLAY_NAME_A,), 0) + name = data[0][1] + count = parent.GetContentsTable(0).GetRowCount(0) + return MAPIMsgStoreFolder(self.msgstore, parent_id, name, count) + + def RememberMessageCurrentFolder(self): + self._EnsureObject() + folder = self.GetFolder() + props = ( (mapi.PS_PUBLIC_STRINGS, "SpamBayesOriginalFolderStoreID"), + (mapi.PS_PUBLIC_STRINGS, "SpamBayesOriginalFolderID") + ) + resolve_ids = self.mapi_object.GetIDsFromNames(props, mapi.MAPI_CREATE) + prop_ids = PROP_TAG( PT_BINARY, PROP_ID(resolve_ids[0])), \ + PROP_TAG( PT_BINARY, PROP_ID(resolve_ids[1])) + + prop_tuples = (prop_ids[0],folder.id[0]), (prop_ids[1],folder.id[1]) + self.mapi_object.SetProps(prop_tuples) + self.dirty = True + + def GetRememberedFolder(self): + props = ( (mapi.PS_PUBLIC_STRINGS, "SpamBayesOriginalFolderStoreID"), + (mapi.PS_PUBLIC_STRINGS, "SpamBayesOriginalFolderID") + ) + try: + self._EnsureObject() + resolve_ids = self.mapi_object.GetIDsFromNames(props, mapi.MAPI_CREATE) + prop_ids = PROP_TAG( PT_BINARY, PROP_ID(resolve_ids[0])), \ + PROP_TAG( PT_BINARY, PROP_ID(resolve_ids[1])) + hr, data = self.mapi_object.GetProps(prop_ids,0) + if hr != 0: + return None + (store_tag, store_id), (eid_tag, eid) = data + folder_id = mapi.HexFromBin(store_id), mapi.HexFromBin(eid) + return self.msgstore.GetFolder(folder_id) + except: + print "Error locating origin of message", self + return None def test(): From montanaro at users.sourceforge.net Tue Feb 4 07:57:22 2003 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue Feb 4 10:57:26 2003 Subject: [Spambayes-checkins] spambayes hammiefilter.py,1.14,1.15 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv19995 Modified Files: hammiefilter.py Log Message: typo Index: hammiefilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/hammiefilter.py,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** hammiefilter.py 29 Jan 2003 03:23:34 -0000 1.14 --- hammiefilter.py 4 Feb 2003 15:57:20 -0000 1.15 *************** *** 70,74 **** ! procmail recipie to filter and train in one step: :0 fw | %(program)s -t --- 70,74 ---- ! procmail recipe to filter and train in one step: :0 fw | %(program)s -t From mhammond at users.sourceforge.net Tue Feb 4 19:09:45 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Tue Feb 4 22:09:51 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.42,1.43 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv21082 Modified Files: manager.py Log Message: Whitespace normalisation Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** manager.py 4 Feb 2003 02:19:46 -0000 1.42 --- manager.py 5 Feb 2003 03:09:42 -0000 1.43 *************** *** 108,112 **** def close_mdb(self, mdb): pass ! class DBStorageManager(BasicStorageManager): db_extension = ".db" --- 108,112 ---- def close_mdb(self, mdb): pass ! class DBStorageManager(BasicStorageManager): db_extension = ".db" *************** *** 133,137 **** mdb.close() ! # Our main "bayes manager" class BayesManager: def __init__(self, config_base="default", outlook=None, verbose=1): --- 133,137 ---- mdb.close() ! # Our main "bayes manager" class BayesManager: def __init__(self, config_base="default", outlook=None, verbose=1): From mhammond at users.sourceforge.net Tue Feb 4 19:22:40 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Tue Feb 4 22:22:44 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 config.py,1.5,1.6 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv25538 Modified Files: config.py Log Message: Correct some default settings. Index: config.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/config.py,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** config.py 13 Dec 2002 09:27:12 -0000 1.5 --- config.py 5 Feb 2003 03:22:38 -0000 1.6 *************** *** 61,68 **** spam_folder_id = None, spam_threshold = 90, ! spam_action = "Nothing", unsure_folder_id = None, unsure_threshold = 15, ! unsure_action = "Nothing", enabled = False, ) --- 61,68 ---- spam_folder_id = None, spam_threshold = 90, ! spam_action = "Untouched", unsure_folder_id = None, unsure_threshold = 15, ! unsure_action = "Untouched", enabled = False, ) From mhammond at users.sourceforge.net Fri Feb 7 03:23:37 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Fri Feb 7 06:23:42 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.43,1.44 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv14114 Modified Files: manager.py Log Message: Paul Moore found some errors using bsddb3. Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** manager.py 5 Feb 2003 03:09:42 -0000 1.43 --- manager.py 7 Feb 2003 11:23:34 -0000 1.44 *************** *** 38,47 **** # on Windows, so we don't use that) try: ! from bsddb import db # This name doesn't exist in the old one. ! use_db = True except ImportError: # See if the explicit bsddb3 module exists. try: ! import bsddb3 use_db = True except ImportError: --- 38,47 ---- # on Windows, so we don't use that) try: ! import bsddb ! use_db = hasattr(bsddb, "db") # This name doesn't exist in the old one. except ImportError: # See if the explicit bsddb3 module exists. try: ! import bsddb3 as bsddb use_db = True except ImportError: *************** *** 117,129 **** bayes.dbm.close() def open_mdb(self): - try: - import bsddb - except ImportError: - import bsddb3 as bsddb return bsddb.hashopen(self.mdb_filename) def new_mdb(self): try: os.unlink(self.mdb_filename) ! except IOError, e: if e.errno != errno.ENOENT: raise return self.open_mdb() --- 117,125 ---- bayes.dbm.close() def open_mdb(self): return bsddb.hashopen(self.mdb_filename) def new_mdb(self): try: os.unlink(self.mdb_filename) ! except EnvironmentError, e: if e.errno != errno.ENOENT: raise return self.open_mdb() From mhammond at users.sourceforge.net Fri Feb 7 03:28:36 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Fri Feb 7 06:28:40 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.44,1.45 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv16179 Modified Files: manager.py Log Message: The code change mentioned isn't recent any more! Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** manager.py 7 Feb 2003 11:23:34 -0000 1.44 --- manager.py 7 Feb 2003 11:28:32 -0000 1.45 *************** *** 225,231 **** % (folder.Name.encode("mbcs", "replace"),) print "", details - print " This is probably because the code has recently"\ - " been changed, but it will" - print " have no effect on the filtering or scoring." # else no items in this folder - not much worth doing! if include_sub: --- 225,228 ---- From timstone4 at users.sourceforge.net Sat Feb 8 09:17:05 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 8 12:17:09 2003 Subject: [Spambayes-checkins] spambayes dbExpImp.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv21177 Modified Files: dbExpImp.py Log Message: Ensure that nham and nspam are instances of integer Index: dbExpImp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/dbExpImp.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** dbExpImp.py 1 Feb 2003 19:07:10 -0000 1.3 --- dbExpImp.py 8 Feb 2003 17:17:02 -0000 1.4 *************** *** 167,175 **** if newDBM: ! bayes.nham = nham ! bayes.nspam = nspam else: ! bayes.nham += nham ! bayes.nspam += nspam if newDBM: --- 167,175 ---- if newDBM: ! bayes.nham = int(nham) ! bayes.nspam = int(nspam) else: ! bayes.nham += int(nham) ! bayes.nspam += int(nspam) if newDBM: From timstone4 at users.sourceforge.net Sat Feb 8 09:18:40 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 8 12:18:45 2003 Subject: [Spambayes-checkins] spambayes/spambayes classifier.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv21717 Modified Files: classifier.py Log Message: Somehow, nham and nspam became strings in a user's database. This fix ensures that this event doesn't break classification, and corrects the database in the process. Index: classifier.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/classifier.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** classifier.py 29 Jan 2003 03:23:34 -0000 1.3 --- classifier.py 8 Feb 2003 17:18:37 -0000 1.4 *************** *** 385,391 **** self.probcache = {} # nuke the prob cache if is_spam: ! self.nspam += 1 else: ! self.nham += 1 for word in Set(wordstream): --- 385,391 ---- self.probcache = {} # nuke the prob cache if is_spam: ! self.nspam = int(self.nspam) + 1 # account for string nspam else: ! self.nham = int(self.nham) + 1 # account for string nham for word in Set(wordstream): From timstone4 at users.sourceforge.net Sat Feb 8 18:25:19 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 8 21:25:22 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.10,1.11 OptionConfig.py,1.2,1.3 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv30403 Modified Files: Options.py OptionConfig.py Log Message: Added configuration option to put classification into To: header to accomodate email clients (e.g. Outlook Express) that cannot test for presence/value of arbitrary headers. These clients can test for the classification in the recipient list. The classification is not a valid email address, so spammers will have to be a bit clever to spoof this, though it is certainly possible. A spoof-proof scheme for these (dumb) mail clients may not be possible. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Options.py 3 Feb 2003 08:07:46 -0000 1.10 --- Options.py 9 Feb 2003 02:25:13 -0000 1.11 *************** *** 387,390 **** --- 387,391 ---- pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db + pop3proxy_notate_to: True # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. *************** *** 492,495 **** --- 493,497 ---- 'pop3proxy_persistent_use_database': boolean_cracker, 'pop3proxy_persistent_storage_file': string_cracker, + 'pop3proxy_notate_to': boolean_cracker, }, 'html_ui': {'html_ui_port': int_cracker, Index: OptionConfig.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/OptionConfig.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** OptionConfig.py 24 Jan 2003 23:59:22 -0000 1.2 --- OptionConfig.py 9 Feb 2003 02:25:14 -0000 1.3 *************** *** 46,49 **** --- 46,50 ---- 'p3servers': ('pop3proxy', 'pop3proxy_servers'), 'p3ports': ('pop3proxy', 'pop3proxy_ports'), + 'p3notate': ('pop3proxy', 'pop3proxy_notate_to'), } *************** *** 51,55 **** # destructive - they default to being empty, so you gain nothing by # restoring them. ! noRestore = ('pop3proxy_servers', 'pop3proxy_ports') # This governs the order in which the options appear on the configurator --- 52,56 ---- # destructive - they default to being empty, so you gain nothing by # restoring them. ! noRestore = ('pop3proxy_servers', 'pop3proxy_ports', 'pop3_notate_to') # This governs the order in which the options appear on the configurator *************** *** 78,81 **** --- 79,94 ---- port. If there are multiple servers, you must specify the same number of ports as servers, separated by commas."""), + + ("p3notate", "Notate To", + """Some email clients (Outlook Express, for example) can only set + up filtering rules on a limited set of headers. These clients + cannot test for the existence/value of an arbitrary header and filter + mail based on that information. To accomodate these kind of mail + clients, the Notate To: can be checked, which will add "spam,", + "ham,", or "unsure," to the recipient list. A filter rule can then + test to see if one of these words (followed by a comma) is in the + recipient list, and route the mail to an appropriate folder, or take + whatever other action is supported and appropriate for the mail + classification."""), )), *************** *** 303,306 **** --- 316,330 ---- errmsg += '
  • Ham cutoff must be less than Spam cutoff
  • \n' + try: + nto = parms['p3notate'] + except KeyError: + if options.pop3proxy_notate_to: + nto = "True" + else: + nto = "False" + + if not nto == "True" and not nto == "False": + errmsg += """
  • Notate To: must be "True" or "False".
  • \n""" + # edit for equal number of pop3servers and ports try: From timstone4 at users.sourceforge.net Sat Feb 8 18:25:48 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Sat Feb 8 21:25:50 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.51,1.52 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv30980 Modified Files: pop3proxy.py Log Message: Added code to put classification into To: header to accomodate email clients (e.g. Outlook Express) that cannot test for presence/value of arbitrary headers. These clients can test for the classification in the recipient list. The classification is not a valid email address, so spammers will have to be a bit clever to spoof this, though it is certainly possible. A spoof-proof scheme for these (dumb) mail clients may not be possible. Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** pop3proxy.py 3 Feb 2003 07:53:12 -0000 1.51 --- pop3proxy.py 9 Feb 2003 02:25:45 -0000 1.52 *************** *** 499,502 **** --- 499,509 ---- headers, body = re.split(r'\n\r?\n', messageText, 1) headers = headers + "\n" + header + "\r\n" + + if options.pop3proxy_notate_to: + # add 'spam' as recip + tore = re.compile("^To: ", re.IGNORECASE | re.MULTILINE) + headers = re.sub(tore,"To: %s," % (disposition), + headers) + messageText = headers + body From timstone4 at users.sourceforge.net Mon Feb 10 07:41:29 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Mon Feb 10 10:41:36 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.11,1.12 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv29379 Modified Files: Options.py Log Message: Changed bayescustomize environment parsing from simple split to a regex that keeps filenames with embedded spaces intact. This works in my testing... Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Options.py 9 Feb 2003 02:25:13 -0000 1.11 --- Options.py 10 Feb 2003 15:41:23 -0000 1.12 *************** *** 24,27 **** --- 24,28 ---- return not not val + import re __all__ = ['options'] *************** *** 564,568 **** alternate = os.getenv('BAYESCUSTOMIZE') if alternate: ! filenames = alternate.split() options.mergefiles(filenames) optionsPathname = os.path.abspath(filenames[-1]) --- 565,570 ---- alternate = os.getenv('BAYESCUSTOMIZE') if alternate: ! r = re.compile(r'(.*?\.ini) *',re.IGNORECASE) ! filenames = r.findall(alternate) options.mergefiles(filenames) optionsPathname = os.path.abspath(filenames[-1]) From mhammond at users.sourceforge.net Wed Feb 12 14:05:39 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Wed Feb 12 17:05:42 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.45,1.46 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv9833 Modified Files: manager.py Log Message: Check for the correct exception when removing the file. Check for bsddb3 before bsddb, else bsddb3 would never be found! Reformat some comments to 80 chars. Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** manager.py 7 Feb 2003 11:28:32 -0000 1.45 --- manager.py 12 Feb 2003 22:05:33 -0000 1.46 *************** *** 38,56 **** # on Windows, so we don't use that) try: ! import bsddb ! use_db = hasattr(bsddb, "db") # This name doesn't exist in the old one. except ImportError: ! # See if the explicit bsddb3 module exists. try: ! import bsddb3 as bsddb ! use_db = True except ImportError: use_db = False ! # This is a little bit of a hack . We are generally in a child directory ! # of the bayes code. To help installation, we handle the fact that this may ! # not be on sys.path. Note that doing these imports is delayed, so that we ! # can set the BAYESCUSTOMIZE envar first (if we import anything from the core ! # spambayes code before setting that envar, our .ini file may have no effect). def import_core_spambayes_stuff(ini_filename): global bayes_classifier, bayes_tokenize, bayes_storage --- 38,59 ---- # on Windows, so we don't use that) try: ! import bsddb3 as bsddb ! # bsddb3 is definitely not broken ! use_db = True except ImportError: ! # Not using the 3rd party bsddb3, so try the one in the std library try: ! import bsddb ! use_db = hasattr(bsddb, "db") # This name is not in the old one. except ImportError: + # No DB library at all! use_db = False ! # This is a little bit of a hack . We are generally in a child ! # directory of the bayes code. To help installation, we handle the ! # fact that this may not be on sys.path. Note that doing these ! # imports is delayed, so that we can set the BAYESCUSTOMIZE envar ! # first (if we import anything from the core spambayes code before ! # setting that envar, our .ini file may have no effect). def import_core_spambayes_stuff(ini_filename): global bayes_classifier, bayes_tokenize, bayes_storage *************** *** 86,90 **** try: os.unlink(self.bayes_filename) ! except IOError, e: if e.errno != errno.ENOENT: raise return self.open_bayes() --- 89,93 ---- try: os.unlink(self.bayes_filename) ! except EnvironmentError, e: if e.errno != errno.ENOENT: raise return self.open_bayes() From mhammond at users.sourceforge.net Wed Feb 12 14:02:24 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Wed Feb 12 17:08:40 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 README.txt,1.8,1.9 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv8548 Modified Files: README.txt Log Message: Correct some out of date info. Index: README.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/README.txt,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** README.txt 24 Nov 2002 22:43:43 -0000 1.8 --- README.txt 12 Feb 2003 22:02:16 -0000 1.9 *************** *** 62,71 **** necessary for you to *see* the score, not for the scoring to work. - * Filtering an Exchange Server public store appears to not work (is this - still true?) - * Sean reports bad output saving very large classifiers in training.py. Somewhere over 4MB, they seem to stop working. Mark's hasn't got ! that big yet - just over 2MB and going strong. Misc Comments --- 62,69 ---- necessary for you to *see* the score, not for the scoring to work. * Sean reports bad output saving very large classifiers in training.py. Somewhere over 4MB, they seem to stop working. Mark's hasn't got ! that big yet - 3.8 MB, then he moved to the bsddb database - all with ! no problems. Misc Comments From mhammond at users.sourceforge.net Thu Feb 13 15:37:00 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 13 18:37:04 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000/dialogs FolderSelector.py,1.12,1.13 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/dialogs In directory sc8-pr-cvs1:/tmp/cvs-serv4464 Modified Files: FolderSelector.py Log Message: Bug 685746: Sort folder names in folder selector. Index: FolderSelector.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/FolderSelector.py,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** FolderSelector.py 26 Jan 2003 01:17:27 -0000 1.12 --- FolderSelector.py 13 Feb 2003 23:36:56 -0000 1.13 *************** *** 58,64 **** table = folder.GetHierarchyTable(0) children = [] rows = mapi.HrQueryAllRows(table, (PR_ENTRYID, PR_STORE_ENTRYID, ! PR_DISPLAY_NAME_A), None, None, 0) for (eid_tag, eid),(storeeid_tag, store_eid), (name_tag, name) in rows: # Note the eid we get here is short-term - hence we must --- 58,65 ---- table = folder.GetHierarchyTable(0) children = [] + order = (((PR_DISPLAY_NAME_A, mapi.TABLE_SORT_ASCEND),),0,0) rows = mapi.HrQueryAllRows(table, (PR_ENTRYID, PR_STORE_ENTRYID, ! PR_DISPLAY_NAME_A), None, order, 0) for (eid_tag, eid),(storeeid_tag, store_eid), (name_tag, name) in rows: # Note the eid we get here is short-term - hence we must From mhammond at users.sourceforge.net Thu Feb 13 16:11:58 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 13 19:12:02 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 tester.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv17683 Modified Files: tester.py Log Message: Allow this to work with Python 2.2 Index: tester.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/tester.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** tester.py 3 Feb 2003 22:48:03 -0000 1.3 --- tester.py 14 Feb 2003 00:11:55 -0000 1.4 *************** *** 9,12 **** --- 9,14 ---- # in action. Also checks that the message correctly remains classified # after a message move. + from __future__ import generators + from win32com.client import constants from time import sleep From mhammond at users.sourceforge.net Thu Feb 13 17:24:24 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 13 20:24:27 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 addin.py,1.48,1.49 filter.py,1.16,1.17 manager.py,1.46,1.47 msgstore.py,1.39,1.40 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv7553 Modified Files: addin.py filter.py manager.py msgstore.py Log Message: Handle exceptions when opening folders better. GetFolder() now returns None, rather than forcing each caller to check for obscure COM errors. Index: addin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** addin.py 4 Feb 2003 06:17:32 -0000 1.48 --- addin.py 14 Feb 2003 01:24:20 -0000 1.49 *************** *** 546,550 **** mapi_folder = self.manager.message_store.GetFolder(outlook_folder) look_id = self.manager.config.filter.spam_folder_id ! if look_id: look_folder = self.manager.message_store.GetFolder(look_id) if mapi_folder == look_folder: --- 546,550 ---- mapi_folder = self.manager.message_store.GetFolder(outlook_folder) look_id = self.manager.config.filter.spam_folder_id ! if mapi_folder is not None and look_id: look_folder = self.manager.message_store.GetFolder(look_id) if mapi_folder == look_folder: *************** *** 554,558 **** # Check if uncertain look_id = self.manager.config.filter.unsure_folder_id ! if look_id: look_folder = self.manager.message_store.GetFolder(look_id) if mapi_folder == look_folder: --- 554,558 ---- # Check if uncertain look_id = self.manager.config.filter.unsure_folder_id ! if mapi_folder is not None and look_id: look_folder = self.manager.message_store.GetFolder(look_id) if mapi_folder == look_folder: Index: filter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** filter.py 4 Feb 2003 06:17:32 -0000 1.16 --- filter.py 14 Feb 2003 01:24:21 -0000 1.17 *************** *** 39,46 **** elif action.startswith("co"): # copied dest_folder = mgr.message_store.GetFolder(folder_id) ! msg.CopyTo(dest_folder) elif action.startswith("mo"): # Moved dest_folder = mgr.message_store.GetFolder(folder_id) ! msg.MoveTo(dest_folder) else: raise RuntimeError, "Eeek - bad action '%r'" % (action,) --- 39,54 ---- elif action.startswith("co"): # copied dest_folder = mgr.message_store.GetFolder(folder_id) ! if dest_folder is None: ! print "ERROR: Unable to open the folder to Copy the " \ ! "message - this message was not copied" ! else: ! msg.CopyTo(dest_folder) elif action.startswith("mo"): # Moved dest_folder = mgr.message_store.GetFolder(folder_id) ! if dest_folder is None: ! print "ERROR: Unable to open the folder to Move the " \ ! "message - this message was not moved" ! else: ! msg.MoveTo(dest_folder) else: raise RuntimeError, "Eeek - bad action '%r'" % (action,) Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** manager.py 12 Feb 2003 22:05:33 -0000 1.46 --- manager.py 14 Feb 2003 01:24:21 -0000 1.47 *************** *** 172,179 **** names = [] for eid in folder_ids: ! try: ! name = self.message_store.GetFolder(eid).name ! except pythoncom.com_error: name = "" names.append(name) ret = '; '.join(names) --- 172,180 ---- names = [] for eid in folder_ids: ! folder = self.message_store.GetFolder(eid) ! if folder is None: name = "" + else: + name = folder.name names.append(name) ret = '; '.join(names) *************** *** 197,200 **** --- 198,206 ---- assert self.outlook is not None, "I need outlook :(" msgstore_folder = self.message_store.GetFolder(folder_id) + if msgstore_folder is None: + print "Checking a folder for our field failed - "\ + "there is no such folder." + return + folder = msgstore_folder.GetOutlookItem() if self.verbose > 1: Index: msgstore.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** msgstore.py 4 Feb 2003 06:17:32 -0000 1.39 --- msgstore.py 14 Feb 2003 01:24:21 -0000 1.40 *************** *** 205,210 **** for folder_id in folder_ids: folder_id = self.NormalizeID(folder_id) ! folder = self._OpenEntry(folder_id) ! table = folder.GetContentsTable(0) rc, props = folder.GetProps( (PR_DISPLAY_NAME_A,), 0) yield MAPIMsgStoreFolder(self, folder_id, props[0][1], --- 205,219 ---- for folder_id in folder_ids: folder_id = self.NormalizeID(folder_id) ! try: ! folder = self._OpenEntry(folder_id) ! table = folder.GetContentsTable(0) ! except pythoncom.com_error, (hr, msg, exc, arg_err): ! # We will ignore *all* such errors for the time ! # being, but warn for results we don't know about. ! if hr not in [mapi.MAPI_E_OBJECT_DELETED, mapi.MAPI_E_NOT_FOUND]: ! print "WARNING: Unexpected MAPI error opening folder" ! print "Error:", mapiutil.GetScodeString(hr) ! print "Exception Message:", msg ! continue rc, props = folder.GetProps( (PR_DISPLAY_NAME_A,), 0) yield MAPIMsgStoreFolder(self, folder_id, props[0][1], *************** *** 222,227 **** else: folder_id = self.NormalizeID(folder_id) ! folder = self._OpenEntry(folder_id) ! table = folder.GetContentsTable(0) # Ensure we have a long-term ID. rc, props = folder.GetProps( (PR_ENTRYID, PR_DISPLAY_NAME_A), 0) --- 231,245 ---- else: folder_id = self.NormalizeID(folder_id) ! try: ! folder = self._OpenEntry(folder_id) ! table = folder.GetContentsTable(0) ! except pythoncom.com_error, (hr, msg, exc, arg_err): ! # We will ignore *all* such errors for the time ! # being, but warn for results we don't know about. ! if hr not in [mapi.MAPI_E_OBJECT_DELETED, mapi.MAPI_E_NOT_FOUND]: ! print "WARNING: Unexpected MAPI error opening folder" ! print "Error:", mapiutil.GetScodeString(hr) ! print "Exception Message:", msg ! return None # Ensure we have a long-term ID. rc, props = folder.GetProps( (PR_ENTRYID, PR_DISPLAY_NAME_A), 0) From mhammond at users.sourceforge.net Thu Feb 13 17:24:25 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 13 20:24:28 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000/dialogs FilterDialog.py,1.15,1.16 FolderSelector.py,1.13,1.14 TrainingDialog.py,1.10,1.11 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/dialogs In directory sc8-pr-cvs1:/tmp/cvs-serv7553/dialogs Modified Files: FilterDialog.py FolderSelector.py TrainingDialog.py Log Message: Handle exceptions when opening folders better. GetFolder() now returns None, rather than forcing each caller to check for obscure COM errors. Index: FilterDialog.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/FilterDialog.py,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** FilterDialog.py 4 Feb 2003 03:15:21 -0000 1.15 --- FilterDialog.py 14 Feb 2003 01:24:22 -0000 1.16 *************** *** 6,10 **** import win32ui import win32api - import pythoncom from win32com.client import constants --- 6,9 ---- *************** *** 331,338 **** names = [] for eid in self.mgr.config.filter_now.folder_ids: ! try: ! name = self.mgr.message_store.GetFolder(eid).name ! except pythoncom.com_error: name = "" names.append(name) self.SetDlgItemText(IDC_FOLDER_NAMES, "; ".join(names)) --- 330,338 ---- names = [] for eid in self.mgr.config.filter_now.folder_ids: ! folder = self.mgr.message_store.GetFolder(eid) ! if folder is None: name = "" + else: + name = folder.name names.append(name) self.SetDlgItemText(IDC_FOLDER_NAMES, "; ".join(names)) Index: FolderSelector.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/FolderSelector.py,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** FolderSelector.py 13 Feb 2003 23:36:56 -0000 1.13 --- FolderSelector.py 14 Feb 2003 01:24:22 -0000 1.14 *************** *** 51,55 **** from win32com.mapi import mapi from win32com.mapi.mapitags import * - import pythoncom def _BuildFoldersMAPI(manager, folder_id): --- 51,54 ---- *************** *** 67,75 **** # so no need to do it explicitly - just believe folder.id over eid) temp_id = mapi.HexFromBin(store_eid), mapi.HexFromBin(eid) ! try: ! child_folder = manager.message_store.GetFolder(temp_id) ! except pythoncom.com_error: ! # Bad folder for some reason - ignore it. ! child_folder = None if child_folder is not None: spec = FolderSpec(child_folder.GetID(), name) --- 66,70 ---- # so no need to do it explicitly - just believe folder.id over eid) temp_id = mapi.HexFromBin(store_eid), mapi.HexFromBin(eid) ! child_folder = manager.message_store.GetFolder(temp_id) if child_folder is not None: spec = FolderSpec(child_folder.GetID(), name) Index: TrainingDialog.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/TrainingDialog.py,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** TrainingDialog.py 25 Jan 2003 06:51:02 -0000 1.10 --- TrainingDialog.py 14 Feb 2003 01:24:22 -0000 1.11 *************** *** 6,10 **** import win32ui import win32api - import pythoncom from win32com.client import constants --- 6,9 ---- *************** *** 79,86 **** names = [] for eid in self.config.ham_folder_ids: ! try: ! name = self.mgr.message_store.GetFolder(eid).name ! except pythoncom.com_error: name = "" names.append(name) self.SetDlgItemText(IDC_STATIC_HAM, "; ".join(names)) --- 78,86 ---- names = [] for eid in self.config.ham_folder_ids: ! folder = self.mgr.message_store.GetFolder(eid) ! if folder is None: name = "" + else: + name = folder.name names.append(name) self.SetDlgItemText(IDC_STATIC_HAM, "; ".join(names)) *************** *** 88,95 **** names = [] for eid in self.config.spam_folder_ids: ! try: ! name = self.mgr.message_store.GetFolder(eid).name ! except pythoncom.com_error: name = "" names.append(name) self.SetDlgItemText(IDC_STATIC_SPAM, "; ".join(names)) --- 88,96 ---- names = [] for eid in self.config.spam_folder_ids: ! folder = self.mgr.message_store.GetFolder(eid) ! if folder is None: name = "" + else: + name = folder.name names.append(name) self.SetDlgItemText(IDC_STATIC_SPAM, "; ".join(names)) From montanaro at users.sourceforge.net Thu Feb 13 18:29:06 2003 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Thu Feb 13 21:29:11 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.12,1.13 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv27765 Modified Files: Options.py Log Message: split BAYESCUSTOMIZE using os.pathsep - note that this changes behavior - no more space-separated file lists! Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Options.py 10 Feb 2003 15:41:23 -0000 1.12 --- Options.py 14 Feb 2003 02:29:03 -0000 1.13 *************** *** 565,570 **** alternate = os.getenv('BAYESCUSTOMIZE') if alternate: ! r = re.compile(r'(.*?\.ini) *',re.IGNORECASE) ! filenames = r.findall(alternate) options.mergefiles(filenames) optionsPathname = os.path.abspath(filenames[-1]) --- 565,569 ---- alternate = os.getenv('BAYESCUSTOMIZE') if alternate: ! filenames = alternate.split(os.pathsep) options.mergefiles(filenames) optionsPathname = os.path.abspath(filenames[-1]) From montanaro at users.sourceforge.net Thu Feb 13 18:39:00 2003 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Thu Feb 13 21:39:03 2003 Subject: [Spambayes-checkins] spambayes README.txt,1.45,1.46 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv30466 Modified Files: README.txt Log Message: update note about BAYESCUSTOMIZE to reflect change to separator Index: README.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/README.txt,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** README.txt 2 Dec 2002 04:43:37 -0000 1.45 --- README.txt 14 Feb 2003 02:38:57 -0000 1.46 *************** *** 41,49 **** near the start, and consult attributes of options. ! As an alternative to bayescustomize.ini, you can set the ! environment variable BAYESCUSTOMIZE to a whitespace-separated list ! of one or more .ini files, these will be read in, in order, and ! applied to the options. This allows you to tweak individual runs ! by combining fragments of .ini files. classifier.py --- 41,55 ---- near the start, and consult attributes of options. ! As an alternative to bayescustomize.ini, you can set the environment ! variable BAYESCUSTOMIZE to a list of one or more .ini files, these will ! be read in, in order, and applied to the options. This allows you to ! tweak individual runs by combining fragments of .ini files. The ! character used to separate different .ini files is platform-dependent. ! On Unix, Linux and Mac OS X systems it is ':'. On Windows it is ';'. ! On Mac OS 9 and earlier systems it is a NL character. ! ! *NOTE* The separator character changed after the second alpha version of ! the first release. Previously, if multiple files were specified in ! BAYESCUSTOMIZE they were space-separated. classifier.py From timstone4 at users.sourceforge.net Fri Feb 14 06:40:49 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Fri Feb 14 09:40:53 2003 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui.html,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1:/tmp/cvs-serv22926 Modified Files: ui.html Log Message: Correct misspelling Index: ui.html =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui.html,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** ui.html 24 Jan 2003 23:56:11 -0000 1.3 --- ui.html 14 Feb 2003 14:40:47 -0000 1.4 *************** *** 243,247 **** Either upload a message or mbox file:
    ! Or paste one whole message (incuding headers) here:

    --- 243,247 ---- Either upload a message or mbox file:
    ! Or paste one whole message (including headers) here:

    From tim at fourstonesExpressions.com Fri Feb 14 09:24:18 2003 From: tim at fourstonesExpressions.com (Tim Stone - Four Stones Expressions) Date: Fri Feb 14 10:24:35 2003 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui.html,1.3,1.4 In-Reply-To: Message-ID: Argh, there's a resourcepackage I gotta run to get this change implemented... I'll get it l8r... :( -TimS 2/14/2003 8:40:49 AM, "Tim Stone" wrote: >Update of /cvsroot/spambayes/spambayes/spambayes/resources >In directory sc8-pr-cvs1:/tmp/cvs-serv22926 > >Modified Files: > ui.html >Log Message: >Correct misspelling > >Index: ui.html >=================================================================== >RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui.html,v >retrieving revision 1.3 >retrieving revision 1.4 >diff -C2 -d -r1.3 -r1.4 >*** ui.html 24 Jan 2003 23:56:11 -0000 1.3 >--- ui.html 14 Feb 2003 14:40:47 -0000 1.4 >*************** >*** 243,247 **** > Either upload a message or mbox file: >
    >! Or paste one whole message (incuding headers) here:
    >
    > >--- 243,247 ---- > Either upload a message or mbox file: >
    >! Or paste one whole message (including headers) here:
    >
    > > > > >_______________________________________________ >Spambayes-checkins mailing list >Spambayes-checkins@python.org >http://mail.python.org/mailman/listinfo/spambayes-checkins > > c'est moi - TimS http://www.fourstonesExpressions.com http://wecanstopspam.org From timstone4 at users.sourceforge.net Fri Feb 14 09:29:25 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Fri Feb 14 12:29:28 2003 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui_html.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1:/tmp/cvs-serv20752 Modified Files: ui_html.py Log Message: Correct misspelling Index: ui_html.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui_html.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** ui_html.py 24 Jan 2003 23:56:27 -0000 1.3 --- ui_html.py 14 Feb 2003 17:29:21 -0000 1.4 *************** *** 127,171 **** \x8a\xb6\x94V\x89\x8as^(\xe2\xea8\xe2\x8a[H\xba\x8a\xa9\x98\xaeI3^\'(M\xea%\ \x8b\xcf)^u\x04\x10\xf3\xa4T\xef\xa5\xddP\xed\xbe}\xdb\xa7\x17\xc3v\xabj\xfa\ ! \xd9H\x12\x84\xe8\xc8\xfeSA\x17\x12\x82\x92\x8f\xedF&u\xda\t\x91U\xb6\xac\ ! \xa8\xa2o\xfc\xdc!\xe5KS\xb7\x14\x89a$$~\xdc,\xa4(9(\xe4\x16P\xf7LW\xaa\xe7>\ ! ^\xec\xc1\xb1\x98\x91\xee\xe2\xce\xf5\xa4>\xee\xcfZ\xd3\x87"oJ$\xdb\xea\x80\ ! \xae\xfc\xce5\x85>\xbd\xcdW\x92\xbb~\x96\x9a5\xd9}H^\x98\xb1\r9\xab\x8bu\xde\ ! |\x80e\x04s\xe4\xa4]k\xbc_\xad\xb0\x06\x80\x91m(\xf1\xfe\x029\xa3[\x95,\xb2Q\ ! \x0c\x955\x8f\x9djI\x9c\xb9\xb9rS\xb8f\x85H\xe5\x03}\x0eP\xdfI\x1f\xee]\r\ ! \xd8B"\xf7\xd7J\x14\xbba3\xa8\xbb\x1bK\xc0\xa6\xff\xc5&\xbf/B\xec\xf3\xac\ ! \xa4\xe0\xb9\xd1G\'#\xf1\xf5E\x9e\x7f6\xe9\x9e\xc20(@x\x08\xeaj.\xd6\xecMz4$\ ! \xf2\xbdv{\xa7\xb8*\x87+\x1fu\xb7\r\r>Vi\x08\x16\x0b\xa9=\xeal]{\xe9\x94\x08\ ! \xb1o)+\xfc\xe4\xe4\xe4\xf4\xec\x89BlCj\xf34\xa5MM\xe8\xd5\xeb7O\x10\xba)d\ ! \xc8\xc3\x18\xf2\x8f\x9d..4@\xb2\xd4\x17\xcc\xba\x84d\xe4\x84\xb7(\xc8\xa0\ ! \x85\x8f.\xe3\xa0R\xa1\x17LF\xff\xf5\xfd\x9f\x9eX\xf0k \xd5*\xef\xad(\x01\ ! \x07\x9f\x10sg\x90Sd\xa2\xeaJ\xdel\x10\xc5c/\x05l\x1b2Kb\xc6\xd0=\x0f\xfa\ ! \xc4+Q:\xd7t\xf8C6\xa8\x93\x13\x90?\x8d`ki\xaes\x8eX\x12\xdf\x0b-\xb0\x10\ ! \xf5\x180\x82\xd5\x97N\x87\xee\x87N\xcd\xad\n\xd1X\xb8%\xb2fP\xd1\x8c /\xda\ ! \xf7\xf86J\xc41\x9e-\x12w?\'\x04@\xbd2_\x91\xe09\xf6\x8b\x08-RC\x02\xeaMp\\\ ! \xe8\x90\x1bu\xeb\xd8yp+TUdM\x1d\xb2\xfd=\x17\xdec\xdbs\x04\xd8\xa2k\xe2\xe9\ ! \xf0y\xe8Ao\xf5\x18O\x03\xa2\x05N\x03\x89\xf6,\x0e\x87?\xfe\xdaG\xf1\xa8f\ ! \xfc\x03P{B\xe7\xea\xfeZ\xdd~\xf8t\xfb\xc1\xa2\x9aG\xd7\xe2B\xe6\xa0\x8e\xd6\ ! \xb3{\xe8\xd9\xbd\xde\xc7bXs\xc5\x82\x1f\xf8?*yR\x11\x10\'S=\x13(\x98k\x1c\ ! \xae\xec\xd5<~\x02d\xbe\xb4r\xcb\xc9\x80\xdf\xa5(uu\x9d\xaa\x9c#\xaa\x1c\x17\ ! 5\rN_\xc8\xc8\x82\xee#\xfb\xe6\xeb\x9bq7\\m\x10r\xfd`l\xee\xb9\xc6\xb9ijY\ ! \xf4o\xc8\xe5\xa2\xf9l\x8e\x86,\xde\xbe{\x7f\xe5\x8c\x19\xbeB\x1c\xb82\\\xdc\ ! :\xb3\xc0\x02NZ9\xfb\xe2\xd2\x84\x8a\x1e)\xba\xe7\xa6\xd5\x1dm\xad\x99Lx(\ ! \x12/x\x8f\x7f\xec\xf7\x87\xfd+\xe7\xc5e{\x9d.\xb9\x1f?\xde\xfc\xb4h}:\xa2?(\ ! 9}\xe3\xd1*\xe4\x83\x8c\xf7\xd2\t\x1c\x84q\xe9g\xc0\xd3\xcf)\xcf\xbdvX\xda]\ ! \x8c\x0ed#\x92\x1c\x8ex\x89\xd5h\x8c\x8f\xe8\x02\xffd2\xf1\x83\xe7\xefG\x9e!\ ! \xa7o\xf4[\xc4\x9e\xba!\xb2\x01\xc75Lc\xb8\xc1\xc6\xael@0\xc0?<\xde>ur\xa7\ ! \xdd\x93k\xce\xca\xc6\xfbt\x88~pQ\x15\x05~I\xfd7\xf4\xd9\xd3\x81\xfd8\xa7l\ ! \xf6\xdc\xa8\xa0\x9eKS\x01\x1e\xc8\xed\x1f\xd6\x9a\xfb\x1c\xa3N\xce\x03\xc3\ ! \x1cY\xbac\xc7\xa4\xc8}H\xd1_\x8bP\xbaR\xf3\xea\xc6!&\x1e\xb4\n\xb1\xc1Z\x84\ ! \xf9\xd6\xe8\x8e?\x08\xafq\x8f5\x11\xdb\x80\xc8\xd2\xbc\xb6p\xa6\x10\xda\xa2\ ! \xc5\x8a\x93\xef\xeb\x81\xcd\xef`\xebV\xd3\xc6\x1b\x0bM|\x80\xc5\x16\x83_\ ! \xfd\xa1\xc1\xa6R\x91\xdcf\xa6\xe26p\x8b\xd6\x19\xe1a\xa5\xec\xce\xb4\x11\n\ ! \xb1?K\x19\x85\xbb\xe6\xb3I\xb3$~(m\xbf\xce\x82\xe7:\x00]I\xa9\xb04`\xa3\xcf\ ! \x12D\xdf\xaf>\x1a\xcflnC\xc1H^\x0c\x95\x15gN\xaa\x1b\r\x7fp\xddT\x8c|\xbd2\ ! \xfa;\xbf\xe5\x01\x1b\x0c\xbf\xd1\xdfCs\xf7\x8b\x04\xe7\x82\x1e\x02\xf1R\x01\ ! \x1e{\xc1\x07\x99\xc1\xa1,\xd9\xd9\x84\x9d\xbc\x9aN^M\xcfN\xd9\xe9drjd?j\xad\ ! m\xd6R*\x9f\x8e\xc7\xdb\xedvT\xda\x05G\xb2X\x8f\x1d\x06\xf0\x1d\x97\xef\\ \ ! \xb7,\xb3\xb3\xcd\xd6!\x11\xa2#\xc0\xb4\xcc|(\x14Gy\xd8Y\xe6F\xc4D\xf9\xa0\ ! \x7f\xfd\x92\x86\x89\xdc\xd1L\xd8K\x90\xc09\xb3\x931/\xd5\xdc\x9aX\xa4\x89A\ ! \x9c8\x1e4EF;\xfd\xf1\xbc\xfe\xbfv\xfe\x05\xfb\x91lY' ) ### end --- 127,171 ---- \x8a\xb6\x94V\x89\x8as^(\xe2\xea8\xe2\x8a[H\xba\x8a\xa9\x98\xaeI3^\'(M\xea%\ \x8b\xcf)^u\x04\x10\xf3\xa4T\xef\xa5\xddP\xed\xbe}\xdb\xa7\x17\xc3v\xabj\xfa\ ! \xd9H\x12\x84\xe8\xc8\xfeSA\x17\x12\x82\x92\x8f\xedF&u\xda\t\x91U\xb6L**\xe9\ ! \x1bGwH\t\xd3\xd4\xadEb\x1c\t\x99\x1f7+)\xca\x0e\n\xb9\x05\xd8=\xd3\xa5\xea\ ! \xb9\x8f7{p.f\xa4\xbb\xbas?\xa9\xcf\xfb\xb3V\xf5\xa1\xd0\x9b2\xc9\xb6>\xa0/\ ! \xbfsm\xa1Oo\xf3\x95\xe4\xae\x9f\xa5fmv\x1f\x92\x17flC\xce*c\x9d8\x1f`\x1d\ ! \xc1\x9c9\xa9\xd7\x1a/X+,\x02`h\x1bJ\xbc\xc0@\xce\xe8Z%\x8bl\x18Cu\xcdc\xa7\ ! \\\x12gn\xb2\xdcT\xaeY!R\xf9@\xdf\x03\xd4\x97\xd2\x87{\x97\x03\xb6\x90\xc9\ ! \xfd\xb5\x12\xc5n\xd8\x0e\xea\xee\xc6\x14\xb0\xe9\x7f\xb1\xc9\xef\x8b\x10\ ! \xfb<+)xn\x14\xd2II|}\x93\xe7\x9fM\xba\xa70\x8c\n\x10\x1f\x82\xbe\x9a\x9b5{\ ! \x95\x1e\r\x89|\xaf\xdd\xde)\xae\xca\xe1\xd2G\xddmc\x83\x8fU\x1a\x82\xc9Bn\ ! \x8f:[\x17_:5B\xec[\xca\n\xbf999={\xa2\x12\xdb\x90\xda\xf1J\x94\xce=\x1d\xfe\x90\r\xea\xec\x04\xe4O#\xd8Z\x9a\ ! \xfb\x9c#\x96\xc4\xf7B\x0b,D=\x06\x8c`\xf5\xad\xd3\xa1\xfb\xa5Ss\xadB4\x16n\ ! \x8d\xac\x19T4#\xc8\x8d\xf6]\xbe\r\x13q\x8cg\xab\xc4\xdd\xef\t\x01P\xaf\xccg\ ! $x\x8e\xfd*B\x8b\xd4\x90\x80z\x13\x1c\x1f:\xe4G\xddBv\x1e\xdc\nU\x15YS\x88l\ ! \x7f\xd0\x85\x17\xd9\xf6\x1c\x01\xb6\xe8\x9ex:|\x1ez\xd0[=\xc6\xd3\x80h\x81\ ! \xd3@\xa2=\x8b\xc3\xe1\xaf\xbf\xf6Q<*\x1a\xff\x00\xd4\x9e\xd0\xb9\xba\xbfV\ ! \xb7\x1f>\xdd~\xb0\xa8\xe6\xd1\xbd\xb8\x909\xa8\xa3u\xed\x1e\xbav\xaf\xf7\ ! \xb5\x18\x16]\xb1\xe2\x07\xfe\x8fj\x9eT\x05\xc4\xc9T\xd0\x04\n\xe6\x1e\x87+{\ ! 7\x8f\xdf\x00\x99O\xad\xdcz2\xe0w)J]^\xa72\xe7\x88J\xc7EM\x83\xd3\'2\xb2\xa0\ ! \x0b\xc9\xbe\xf9\xfaf\xdc\rW\x1b\x84\\?\x18\x9b\x8b\xaeqn\x9aZ\x16\xfd\x1br\ ! \xb9h\xbe\x9b\xa3!\x8b\xb7\xef\xde_9c\x86\xef\x10\x07\xee\x0c\x17\xb7\xce,\ ! \xb0\x80\x93V\xd2\xbe\xb84\xb1\xa2G\x8a\xee\xb9yuG[k&\x13\x1e\x8a\xc4\x0b\ ! \xde\xe3\x1f\xfb\x01b\xff\xceyq\xd9^\xa7K\xee\xc7\x8f7?-Z\xdf\x8e\xe8/JN\xdf\ ! x\xb4\n\xf9 \xe3\xbdt\x06\x07q\\\xfa\x19\xf0\xf4s\xcas\xaf\x1d\x97v\x17\xa3\ ! \x03\xd9\x88$\x87#^b9\x1a\xe3#\xba\xc1?\x99L\xfc\xe0\xf9\x0b\x92g\xc8\xe9+\ ! \xfd\x16\xb1\xa7\xae\x88l\xc0q\r\xd3\x18n\xb0\xb1+\x1b\x10\x0c\xf0\x0f\x8f\ ! \xb7O\x9d\xdci\xf7\xe4\x9a\xb3\xb2\x01?\x1d\xa2\x1f\\TE\x81\x9fR\xff\r}\xf6t\ ! `?\xce)\x9b=7*\xa8\xe7\xd2T\x80\x07r\xfb\x87\xb5\xe6>\xc7\xa8\x93\xf4\xc00G\ ! \x96\xee\xd81)r\x1fR\xf4\xe7"\x94\xaf\xd4\xbc\xbaq\x88\x89\x07\xadBl\xb0\x18\ ! a>6\xba\xe3\x0f\xc2k\xdccM\xc46 \xb24\xaf-\x9c)\x84\xb6h\xb1\xe2\xe4\xfbz`\ ! \xf3;\xd8\xba\xd5\xb4\xf1\xcaB\x13\x1f`\xb1\xc5\xe0W\x7fi\xb0\xa9T$\xb7\x99)\ ! \xb9\r\\\xa3uFxX*\xbb3m\x84B\xec\xcfRF\xe1\xae\xf9n\xd2,\x89_J\xdb\xcf\xb3\ ! \xe0\xb9\x0e@WR*\xac\r\xd8\xe8\xb3\x04\xd1\xf7\xcb\x8f\xc63\x9b\xebP0\x92\ ! \x17Cu\xc5\x99\x93\xebF\xc3_\\7%#_\xaf\x8c\xfe\xceoy\xc0\x06\xc3o\xf4\x07\ ! \xd1\xdc\xfd$\xc1\xb9\xa1\x87@\xbcT\x80\xc7^\xf0Afp(Kv6a\'\xaf\xa6\x93W\xd3\ ! \xb3Sv:\x99\x9c\x1a\xd9\x8fZk\x9b\xb5\x94\xca\xa7\xe3\xf1v\xbb\x1d\x95v\xc1\ ! \x91,\xd6c\x87\x01|\xc7\xe5;7\xc8-\xcb\xecl\xb3uH\x84\xe8\x080-3\x1f\n\xc5Q\ ! \x1ev\x96\xb9\x121Q>\xe8_\xbf\xa6a"w4\x13\xf6\x12$p\xce\xecdLL5\xb7&\x16ib\ ! \x10\'\x8e\x07M\x91\xd1N\x7f=\xaf\xff\xb7\x9d\x7f\x01A?l\xc5' ) ### end From nascheme at users.sourceforge.net Sun Feb 16 09:05:09 2003 From: nascheme at users.sourceforge.net (Neil Schemenauer) Date: Sun Feb 16 12:05:13 2003 Subject: [Spambayes-checkins] spambayes mailsort.py,1.5,1.6 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv8332 Modified Files: mailsort.py Log Message: Add -c and -d options that allow the location of the DB file and options file to be specified. Index: mailsort.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/mailsort.py,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** mailsort.py 29 Jan 2003 03:23:34 -0000 1.5 --- mailsort.py 16 Feb 2003 17:05:07 -0000 1.6 *************** *** 17,21 **** RC_DIR = "~/.spambayes" DB_FILE = RC_DIR + "/wordprobs.cdb" ! OPTION_FILE = RC_DIR + "/bayescustomize.ini" import sys --- 17,21 ---- RC_DIR = "~/.spambayes" DB_FILE = RC_DIR + "/wordprobs.cdb" ! CONFIG_FILE = RC_DIR + "/bayescustomize.ini" import sys *************** *** 29,38 **** DB_FILE = os.path.expanduser(DB_FILE) - if not os.environ.has_key('BAYESCUSTOMIZE'): - os.environ['BAYESCUSTOMIZE'] = os.path.expanduser(OPTION_FILE) ! from spambayes import mboxutils ! from spambayes.cdb_classifier import CdbClassifer ! from spambayes.tokenizer import tokenize --- 29,40 ---- DB_FILE = os.path.expanduser(DB_FILE) ! def import_spambayes(): ! global mboxutils, CdbClassifer, tokenize ! if not os.environ.has_key('BAYESCUSTOMIZE'): ! os.environ['BAYESCUSTOMIZE'] = os.path.expanduser(CONFIG_FILE) ! from spambayes import mboxutils ! from spambayes.cdb_classifier import CdbClassifer ! from spambayes.tokenizer import tokenize *************** *** 135,140 **** raise ! def print_message_score(msg_name): ! msg = email.message_from_file(open(msg_name)) bayes = CdbClassifer(open(DB_FILE, 'rb')) prob, evidence = bayes.spamprob(tokenize(msg), evidence=True) --- 137,142 ---- raise ! def print_message_score(msg_name, msg_fp): ! msg = email.message_from_file(msg_fp) bayes = CdbClassifer(open(DB_FILE, 'rb')) prob, evidence = bayes.spamprob(tokenize(msg), evidence=True) *************** *** 144,168 **** def main(): try: ! opts, args = getopt.getopt(sys.argv[1:], 'ts') except getopt.error, msg: usage(2, msg) ! if len(opts) > 1: ! usage(2, 'conflicting options') ! if not opts: if len(args) != 2: usage(2, 'wrong number of arguments') filter_message(args[0], args[1]) ! elif opts[0][0] == '-t': if len(args) != 2: usage(2, 'wrong number of arguments') train_messages(args[0], args[1]) ! elif opts[0][0] == '-s': ! for msg in args: ! print_message_score(msg) ! else: ! raise RuntimeError # shouldn't get here --- 146,185 ---- def main(): + global DB_FILE, CONFIG_FILE + try: ! opts, args = getopt.getopt(sys.argv[1:], 'tsd:c:') except getopt.error, msg: usage(2, msg) ! mode = 'sort' ! for opt, val in opts: ! if opt == '-t': ! mode = 'train' ! elif opt == '-s': ! mode = 'score' ! elif opt == '-d': ! DB_FILE = val ! elif opt == '-c': ! CONFIG_FILE = val ! else: ! assert 0, 'invalid option' ! import_spambayes() ! ! if mode == 'sort': if len(args) != 2: usage(2, 'wrong number of arguments') filter_message(args[0], args[1]) ! elif mode == 'train': if len(args) != 2: usage(2, 'wrong number of arguments') train_messages(args[0], args[1]) ! elif mode == 'score': ! if args: ! for msg in args: ! print_message_score(msg, open(msg)) ! else: ! print_message_score('', sys.stdin) From nascheme at users.sourceforge.net Sun Feb 16 09:06:10 2003 From: nascheme at users.sourceforge.net (Neil Schemenauer) Date: Sun Feb 16 12:06:13 2003 Subject: [Spambayes-checkins] spambayes/utilities dump_cdb.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/utilities In directory sc8-pr-cvs1:/tmp/cvs-serv8772a Added Files: dump_cdb.py Log Message: Display the contents of a mailsort.py CDB database. --- NEW FILE: dump_cdb.py --- #! /usr/bin/env python RC_DIR = "~/.spambayes" DB_FILE = RC_DIR + "/wordprobs.cdb" import sys import os DB_FILE = os.path.expanduser(DB_FILE) from spambayes.cdb import Cdb def main(): if len(sys.argv) == 2: db_file = sys.argv[1] else: db_file = os.path.expanduser(DB_FILE) db = Cdb(open(db_file, 'rb')) items = [] for k, v in db.iteritems(): items.append((float(v), k)) items.sort() for v, k in items: print k, v if __name__ == "__main__": main() From timstone4 at users.sourceforge.net Tue Feb 18 18:01:58 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Tue Feb 18 21:02:02 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.13,1.14 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv7343 Modified Files: Options.py Log Message: Correct pop3proxy_notate_to default Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Options.py 14 Feb 2003 02:29:03 -0000 1.13 --- Options.py 19 Feb 2003 02:01:54 -0000 1.14 *************** *** 388,392 **** pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db ! pop3proxy_notate_to: True # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. --- 388,392 ---- pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db ! pop3proxy_notate_to: Falsee # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. From timstone4 at users.sourceforge.net Tue Feb 18 18:03:23 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Tue Feb 18 21:03:26 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.14,1.15 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv7723 Modified Files: Options.py Log Message: Just exactly what is Falsee? Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** Options.py 19 Feb 2003 02:01:54 -0000 1.14 --- Options.py 19 Feb 2003 02:03:20 -0000 1.15 *************** *** 388,392 **** pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db ! pop3proxy_notate_to: Falsee # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. --- 388,392 ---- pop3proxy_persistent_use_database: True pop3proxy_persistent_storage_file: hammie.db ! pop3proxy_notate_to: False # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. From mhammond at users.sourceforge.net Tue Feb 18 20:18:03 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Tue Feb 18 23:18:06 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000 manager.py,1.47,1.48 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1:/tmp/cvs-serv13305 Modified Files: manager.py Log Message: Store our config files in the "correct" Windows directory, using the SHGetFolderPath function to locate it. If we can't locate this, or can't create our SpamBayes directory under this, we stick with the "application directory". Code also exists to migrate your existing databases to this directory. First time you run Outlook after this update, your .pck/.db files will be *moved* to the new directory. Thus, no re-training should be necessary. About ready to release a stand-alone SpamBayes Outlook Plugin binary :) Index: manager.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/manager.py,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** manager.py 14 Feb 2003 01:24:21 -0000 1.47 --- manager.py 19 Feb 2003 04:18:01 -0000 1.48 *************** *** 5,8 **** --- 5,9 ---- import sys import errno + import shutil import win32com.client *************** *** 138,145 **** self.verbose = verbose self.application_directory = os.path.dirname(this_filename) if not os.path.isabs(config_base): ! config_base = os.path.join(self.application_directory, config_base) config_base = os.path.abspath(config_base) self.ini_filename = config_base + "_bayes_customize.ini" self.config_filename = config_base + "_configuration.pck" --- 139,149 ---- self.verbose = verbose self.application_directory = os.path.dirname(this_filename) + self.data_directory = self.LocateDataDirectory() + self.MigrateDataDirectory() if not os.path.isabs(config_base): ! config_base = os.path.join(self.data_directory, config_base) config_base = os.path.abspath(config_base) + self.ini_filename = config_base + "_bayes_customize.ini" self.config_filename = config_base + "_configuration.pck" *************** *** 162,166 **** self.message_store = msgstore.MAPIMsgStore(outlook) ! # Outlook gives us thread grief :( def WorkerThreadStarting(self): pythoncom.CoInitialize() --- 166,171 ---- self.message_store = msgstore.MAPIMsgStore(outlook) ! # Outlook used to give us thread grief - now we avoid Outlook ! # from threads, but this remains a worthwhile abstraction. def WorkerThreadStarting(self): pythoncom.CoInitialize() *************** *** 169,172 **** --- 174,216 ---- pythoncom.CoUninitialize() + def LocateDataDirectory(self): + # Locate the best directory the our data files. + from win32com.shell import shell, shellcon + try: + appdata = shell.SHGetFolderPath(0,shellcon.CSIDL_APPDATA,0,0) + path = os.path.join(appdata, "SpamBayes") + if not os.path.isdir(path): + os.makedirs(path) + return path + except pythoncom.com_error: + # Function doesn't exist on early win95, + # and it may just fail anyway! + return self.application_directory + except EnvironmentError: + # Can't make the directory. + return self.application_directory + + def MigrateDataDirectory(self): + # A bit of a nod to save people doing a full retrain. + # Try and locate our files in the old location, and move + # then to the new one. + # Also used first time SpamBayes is run - this will cause + # the ini file to be *copied* to the correct directory + self._MigrateFile("default_bayes_customize.ini", False) + self._MigrateFile("default_bayes_database.pck") + self._MigrateFile("default_bayes_database.db") + self._MigrateFile("default_message_database.pck") + self._MigrateFile("default_message_database.db") + self._MigrateFile("default_configuration.pck") + + def _MigrateFile(self, filename, do_move = True): + src = os.path.join(self.application_directory, filename) + dest = os.path.join(self.data_directory, filename) + if os.path.isfile(src) and not os.path.isfile(dest): + if do_move: + shutil.move(src, dest) + else: + shutil.copyfile(src, dest) + def FormatFolderNames(self, folder_ids, include_sub): names = [] From mhammond at users.sourceforge.net Thu Feb 20 00:14:01 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 20 03:14:04 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000/installer - New directory Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/installer In directory sc8-pr-cvs1:/tmp/cvs-serv22686/installer Log Message: Directory /cvsroot/spambayes/spambayes/Outlook2000/installer added to the repository From mhammond at users.sourceforge.net Thu Feb 20 00:16:33 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu Feb 20 03:16:35 2003 Subject: [Spambayes-checkins] spambayes/Outlook2000/installer .cvsignore,NONE,1.1 README.txt,NONE,1.1 spambayes_addin.iss,NONE,1.1 spambayes_addin.py,NONE,1.1 spambayes_addin.spec,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/installer In directory sc8-pr-cvs1:/tmp/cvs-serv23436 Added Files: .cvsignore README.txt spambayes_addin.iss spambayes_addin.py spambayes_addin.spec Log Message: First checkin of utilities for a stand-alone distribution. --- NEW FILE: .cvsignore --- SpamBayes-Outlook-Setup.exe buildspambayes_addin dist warnspambayes_addin.txt --- NEW FILE: README.txt --- This directory contains scripts for the McMillan Installer, and the Inno Setup Script. Executing: {Installer}\Build.py spambayes_addin.spec Will create 'dist' and 'build_spambayes' directories. Inno setup then is used to create the installation EXE. --- NEW FILE: spambayes_addin.iss --- ; ; Inno Setup 3.x setup file for the Spambayes Outlook Addin ; [Setup] AppName=Spambayes Outlook Addin AppVerName=Spambayes Outlook Addin 0.0.1 AppVersion=0.0.1 DefaultDirName={pf}\Spambayes Outlook Addin DefaultGroupName=Spambayes Outlook Addin OutputDir=. OutputBaseFilename=SpamBayes-Outlook-Setup [Files] Source: "dist\spambayes_addin.dll"; DestDir: "{app}"; Flags: ignoreversion regserver Source: "dist\*.*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs [UninstallDelete] Type: filesandordirs; Name: "{app}\support" --- NEW FILE: spambayes_addin.py --- import sys import string import os import pythoncom pythoncom.frozen = 1 inprocess = getattr(sys, 'frozen', None) import addin klasses = (addin.OutlookAddin,) def DllRegisterServer(): import win32com.server.register win32com.server.register.RegisterClasses(*klasses) addin.RegisterAddin(addin.OutlookAddin) return 0 def DllUnregisterServer(): import win32com.server.register win32com.server.register.UnregisterClasses(*klasses) addin.UnregisterAddin(addin.OutlookAddin) return 0 if sys.frozen!="dll": import win32com.server.localserver for i in range(1, len(sys.argv)): arg = string.lower(sys.argv[i]) if string.find(arg, "/reg") > -1 or string.find(arg, "--reg") > -1: DllRegisterServer() break if string.find(arg, "/unreg") > -1 or string.find(arg, "--unreg") > -1: DllUnregisterServer() break # MS seems to like /automate to run the class factories. if string.find(arg, "/automate") > -1: clsids = [] for k in klasses: clsids.append(k._reg_clsid_) win32com.server.localserver.serve(clsids) break else: # You could do something else useful here. import win32api win32api.MessageBox(0, "This program hosts a COM Object and\r\nis started automatically", "COM Object") --- NEW FILE: spambayes_addin.spec --- # # Specification file for Installer to construct an installable version of # the Spambayes Outlook Addin # from os.path import basename, abspath, join debug = 0 INSTALLER_ROOT = HOMEPATH PROJECT_ROOT=".." # Extra files we need - docs, images, etc. extras = [] # All files in the image directory. import glob for fname in glob.glob(PROJECT_ROOT + "/images/*"): if os.path.isfile(fname): extras.append( ("images/"+basename(fname), abspath(fname), 'DATA') ) # docs extras.append( ("about.html", join(PROJECT_ROOT, "about.html"), 'DATA') ) # config extras.append( ("default_bayes_customize.ini", join(PROJECT_ROOT, "default_bayes_customize.ini"), 'DATA') ) excludes = ['timer', 'dde', 'win32help'] a = Analysis([INSTALLER_ROOT+'/support/_mountzlib.py', INSTALLER_ROOT+'/support/useUnicode.py', 'spambayes_addin.py'], excludes = excludes, pathex=[PROJECT_ROOT,os.path.join(PROJECT_ROOT, '..')]) pyz = PYZ(a.pure) #exe = EXE(pyz, # a.scripts, # exclude_binaries=1, # name='buildspambayes_addin/spambayes_addin.exe', # debug=0, # strip=0, # console=0 ) dll = DLL(pyz, a.scripts, exclude_binaries=1, name='buildspambayes_addin/spambayes_addin.dll', debug=debug) coll = COLLECT(dll, a.binaries + extras - [('MAPI32.dll','','')], strip=0, debug=debug, name='dist') From timstone4 at users.sourceforge.net Tue Feb 25 10:25:16 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Tue Feb 25 13:25:20 2003 Subject: [Spambayes-checkins] spambayes notesfilter.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv21816 Added Files: notesfilter.py Log Message: This is an initial crack at a Lotus Notes "integration" of Spambayes. It is currently tested on Notes 5.0.11, and the term "tested" is used somewhat loosely. This is definitely alpha level software, so backup early, backup often. This is not a plugin as in the Outlook plugin. It is a command line program that interfaces with Notes, applying the Spambayes filter to mail in a Notes mailbox. It is working well for me at the moment, and I'm sure I'll find plenty of things I want to enhance. I have no plans at the moment for an Outlookish style plugin for Notes. --- NEW FILE: notesfilter.py --- #! /usr/bin/env python '''notesfilter.py - Lotus Notes Spambayes interface. Classes: Abstract: This module uses Spambayes as a filter against a Lotus Notes mail database. The Notes client must be running when this process is executed. It requires a Notes folder, named as a parameter, with four subfolders: Spam Ham Train as Spam Train as Ham It classifies mail that is in the inbox. Mail that is classified as spam is moved to the Spam folder. Mail that is to be trained as spam should be manually moved to that folder by the user. Likewise mail that is to be trained as ham. After training, spam is moved to the Spam folder and ham is moved to the Ham folder. Because there is no programmatic way to determine if a particular mail has been previously processed by this classification program, it keeps a pickled dictionary of notes mail ids, so that once a mail has been classified, it will not be classified again. The non-existence of is index file, named .'sbindex', indicates to the system that this is the first time it has been run. Rather than classify the inbox in this case, the contents of the inbox are placed in the index to note the 'starting point' of the system. After that, any new messages in the inbox are eligible for classification. Usage: notesfilter [options] note: option values with spaces in them must be enclosed in double quotes options: -d dbname : pickled training database filename -D dbname : dbm training database filename -l dbname : database filename of local mail replica e.g. localmail.nsf -r server : server address of the server mail database e.g. d27ml602/27/M/IBM if specified, will initiate a replication -f folder : Name of spambayes folder must have subfolders: Spam Ham Train as Spam Train as Ham -t : train contents of Train as Spam and Train as Ham -c : classify inbox -h : help Examples: Replicate and classify inbox notesfilter -c -d notesbayes -r mynoteserv -l mail.nsf -f Spambayes Train Spam and Ham, then classify inbox notesfilter -t -c -d notesbayes -l mail.nsf -f Spambayes Replicate, then classify inbox notesfilter -c -d test7 -l mail.nsf -r nynoteserv -f Spambayes To Do: o Dump/purge notesindex file o Show h:s ratio, make recommendations o Create correct folders if they do not exist o Options for some of this stuff? o pop3proxy style training/configuration interface? o Suggestions? ''' # This module is part of the spambayes project, which is Copyright 2002 # The Python Software Foundation and is covered by the Python Software # Foundation license. __author__ = "Tim Stone " __credits__ = "Mark Hammond, for his remarkable win32 module." from __future__ import generators try: True, False except NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0 def bool(val): return not not val import sys from spambayes import tokenizer, storage from spambayes.Options import options import cPickle as pickle import errno import win32com.client import getopt def classifyInbox(v, vmoveto, bayes, ldbname): # the notesindex hash ensures that a message is looked at only once try: fp = open("%s.sbindex" % (ldbname), 'rb') except IOError, e: if e.errno != errno.ENOENT: raise notesindex = {} print "notesindex file not found, this is a first time run" print "No classification will be performed" firsttime = 1 else: notesindex = pickle.load(fp) fp.close() firsttime = 0 docstomove = [] numham = 0 numspam = 0 numuns = 0 numdocs = 0 doc = v.GetFirstDocument() while doc: nid = doc.NOTEID if firsttime: notesindex[nid] = 'never classified' else: if not notesindex.has_key(nid): numdocs += 1 try: subj = doc.GetItemValue('Subject')[0] except: subj = 'No Subject' try: body = doc.GetItemValue('Body')[0] except: body = 'No Body' message = "Subject: %s\r\n%s" % (subj, body) # generate_long_skips = True blows up on occ. options.generate_long_skips = False tokens = tokenizer.tokenize(message) prob, clues = bayes.spamprob(tokens, evidence=True) if prob < options.ham_cutoff: disposition = options.header_ham_string numham += 1 elif prob > options.spam_cutoff: disposition = options.header_spam_string docstomove += [doc] numspam += 1 else: disposition = options.header_unsure_string numuns += 1 notesindex[nid] = disposition doc = v.GetNextDocument(doc) for doc in docstomove: doc.RemoveFromFolder(v.Name) doc.PutInFolder(vmoveto.Name) print "%s documents processed" % (numdocs) print " %s classified as spam" % (numspam) print " %s classified as ham" % (numham) print " %s classified as unsure" % (numuns) fp = open("timstone.nsf.sbindex", 'wb') pickle.dump(notesindex, fp) fp.close() def processAndTrain(v, vmoveto, bayes, is_spam): if is_spam: str = "spam" else: str = "ham" print "Training %s" % (str) docstomove = [] doc = v.GetFirstDocument() while doc: try: subj = doc.GetItemValue('Subject')[0] except: subj = 'No Subject' try: body = doc.GetItemValue('Body')[0] except: body = 'No Body' message = "Subject: %s\r\n%s" % (subj, body) options.generate_long_skips = False tokens = tokenizer.tokenize(message) bayes.learn(tokens, is_spam) docstomove += [doc] doc = v.GetNextDocument(doc) for doc in docstomove: doc.RemoveFromFolder(v.Name) doc.PutInFolder(vmoveto.Name) print "%s documents trained" % (len(docstomove)) def run(bdbname, useDBM, ldbname, rdbname, foldname, doTrain, doClassify): if useDBM: bayes = storage.DBDictClassifier(bdbname) else: bayes = storage.PickledClassifier(bdbname) sess = win32com.client.Dispatch("Lotus.NotesSession") sess.initialize() db = sess.GetDatabase("",ldbname) vinbox = db.getView('($Inbox)') vspam = db.getView("%s\Spam" % (foldname)) vham = db.getView("%s\Ham" % (foldname)) vtrainspam = db.getView("%s\Train as Spam" % (foldname)) vtrainham = db.getView("%s\Train as Ham" % (foldname)) if rdbname: print "Replicating..." db.Replicate(rdbname) print "Done" if doTrain: processAndTrain(vtrainspam, vspam, bayes, True) # for some reason, using inbox as a target here loses the mail processAndTrain(vtrainham, vham, bayes, False) if doClassify: classifyInbox(vinbox, vspam, bayes, ldbname) bayes.store() if __name__ == '__main__': try: opts, args = getopt.getopt(sys.argv[1:], 'htcd:D:l:r:f:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ sys.exit() bdbname = None # bayes database name ldbname = None # local notes database name rdbname = None # remote notes database location sbfname = None # spambayes folder name doTrain = False doClassify = False for opt, arg in opts: if opt == '-h': print >>sys.stderr, __doc__ sys.exit() elif opt == '-d': useDBM = False bdbname = arg elif opt == '-D': useDBM = True bdbname = arg elif opt == '-l': ldbname = arg elif opt == '-r': rdbname = arg elif opt == '-f': sbfname = arg elif opt == '-t': doTrain = True elif opt == '-c': doClassify = True if (bdbname and ldbname and sbfname and (doTrain or doClassify)): run(bdbname, useDBM, ldbname, rdbname, \ sbfname, doTrain, doClassify) else: print >>sys.stderr, __doc__ From anadelonbrin at users.sourceforge.net Tue Feb 25 15:57:49 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Feb 25 18:57:53 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.52,1.53 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv14752 Modified Files: pop3proxy.py Log Message: Minor typo. Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** pop3proxy.py 9 Feb 2003 02:25:45 -0000 1.52 --- pop3proxy.py 25 Feb 2003 23:57:47 -0000 1.53 *************** *** 1072,1076 **** state.createWorkers() ! # Close the exsiting listeners and create new ones. This won't # affect any running proxies - once a listener has created a proxy, # that proxy is then independent of it. --- 1072,1076 ---- state.createWorkers() ! # Close the existing listeners and create new ones. This won't # affect any running proxies - once a listener has created a proxy, # that proxy is then independent of it. From timstone4 at users.sourceforge.net Tue Feb 25 16:02:08 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Tue Feb 25 19:02:11 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.15,1.16 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv16196 Modified Files: Options.py Log Message: Add option for pop3proxy notation of Subject: header Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** Options.py 19 Feb 2003 02:03:20 -0000 1.15 --- Options.py 26 Feb 2003 00:02:06 -0000 1.16 *************** *** 389,392 **** --- 389,393 ---- pop3proxy_persistent_storage_file: hammie.db pop3proxy_notate_to: False + pop3proxy_notate_subject: False # Deprecated - use pop3proxy_servers and pop3proxy_ports instead. *************** *** 495,498 **** --- 496,500 ---- 'pop3proxy_persistent_storage_file': string_cracker, 'pop3proxy_notate_to': boolean_cracker, + 'pop3proxy_notate_subject': boolean_cracker, }, 'html_ui': {'html_ui_port': int_cracker, From timstone4 at users.sourceforge.net Tue Feb 25 16:11:42 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Tue Feb 25 19:11:45 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.53,1.54 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv19573 Modified Files: pop3proxy.py Log Message: Add option for pop3proxy notation of Subject: header Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** pop3proxy.py 25 Feb 2003 23:57:47 -0000 1.53 --- pop3proxy.py 26 Feb 2003 00:11:38 -0000 1.54 *************** *** 500,509 **** headers = headers + "\n" + header + "\r\n" ! if options.pop3proxy_notate_to: ! # add 'spam' as recip tore = re.compile("^To: ", re.IGNORECASE | re.MULTILINE) headers = re.sub(tore,"To: %s," % (disposition), headers) messageText = headers + body --- 500,517 ---- headers = headers + "\n" + header + "\r\n" ! if options.pop3proxy_notate_to \ ! and disposition == options.header_spam_string: ! # add 'spam' as recip only if spam tore = re.compile("^To: ", re.IGNORECASE | re.MULTILINE) headers = re.sub(tore,"To: %s," % (disposition), headers) + if options.pop3proxy_notate_subject \ + and disposition == options.header_spam_string: + # add 'spam' to subject if spam + tore = re.compile("^Subject: ", re.IGNORECASE | re.MULTILINE) + headers = re.sub(tore,"Subject: %s " % (disposition), + headers) + messageText = headers + body *************** *** 979,984 **** message = file or text message = message.replace('\r\n', '\n').replace('\r', '\n') # For Macs - tokens = tokenizer.tokenize(message) - probability, clues = state.bayes.spamprob(tokens, evidence=True) cluesTable = self.html.cluesTable.clone() --- 987,990 ---- From anadelonbrin at users.sourceforge.net Tue Feb 25 20:36:23 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Feb 25 23:36:26 2003 Subject: [Spambayes-checkins] spambayes/spambayes Corpus.py,1.5,1.6 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv24373/spambayes Modified Files: Corpus.py Log Message: Minor bug in Corpus.get() which meant that it would never return the default value, but would instead create a non-existant message. Index: Corpus.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Corpus.py 28 Jan 2003 07:39:31 -0000 1.5 --- Corpus.py 26 Feb 2003 04:36:21 -0000 1.6 *************** *** 202,209 **** def get(self, key, default=None): ! try: ! return self[key] ! except KeyError: return default def __getitem__(self, key): --- 202,214 ---- def get(self, key, default=None): ! # the old version would never return the default, ! # it would just create a new message, even if that ! # message did not exist in the cache ! # we need to check for the key in our msgs, but we can't check ! # for None, because that signifies a non-cached message ! if self.msgs.get(key, "") is "": return default + else: + return self[key] def __getitem__(self, key): From mhammond at users.sourceforge.net Tue Feb 25 21:12:42 2003 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Wed Feb 26 00:12:46 2003 Subject: [Spambayes-checkins] spambayes/spambayes dbmstorage.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv1668 Modified Files: dbmstorage.py Log Message: Python 2.2 and earlier ended up trying bsddb3 twice if it fails. Index: dbmstorage.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/dbmstorage.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** dbmstorage.py 3 Feb 2003 22:46:26 -0000 1.3 --- dbmstorage.py 26 Feb 2003 05:12:40 -0000 1.4 *************** *** 35,40 **** if sys.version_info >= (2,3): funcs.insert(0, open_dbhash) - else: - funcs.insert(0, open_db3hash) else: funcs = [open_db3hash, open_dbhash, open_gdbm, open_dumbdbm] --- 35,38 ---- From anadelonbrin at users.sourceforge.net Wed Feb 26 20:13:34 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Feb 27 00:13:39 2003 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.16,1.17 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv30319/spambayes Modified Files: Options.py Log Message: Provides options to (a) add an id (as a header or in the body) to incoming mail, (b) strip such ids from incoming mail, (c) find (via the ui) a message based on id, (d) train messages by forwarding/bouncing them to a smtpproxy. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** Options.py 26 Feb 2003 00:02:06 -0000 1.16 --- Options.py 27 Feb 2003 04:13:32 -0000 1.17 *************** *** 389,392 **** --- 389,397 ---- pop3proxy_persistent_storage_file: hammie.db pop3proxy_notate_to: False + pop3proxy_add_mailid_header: False + pop3proxy_mailid_header_name: X-Spambayes-MailId + pop3proxy_mailid_as_header: False + pop3proxy_mailid_in_msgbody: False + pop3proxy_strip_incoming_mailids: False pop3proxy_notate_subject: False *************** *** 396,399 **** --- 401,411 ---- pop3proxy_port: 110 + [smtpproxy] + smtpproxy_servers: + smtpproxy_ports: + smtpproxy_ham_address = spambayes_ham@localhost + smtpproxy_spam_address = spambayes_spam@localhost + smtpproxy_shutdown_address = spambayes_shutdown@localhost + [html_ui] html_ui_port: 8880 *************** *** 496,501 **** --- 508,524 ---- 'pop3proxy_persistent_storage_file': string_cracker, 'pop3proxy_notate_to': boolean_cracker, + 'pop3proxy_add_mailid_header' : boolean_cracker, + 'pop3proxy_mailid_header_name' : string_cracker, + 'pop3proxy_mailid_as_header' : boolean_cracker, + 'pop3proxy_mailid_in_msgbody' : boolean_cracker, + 'pop3proxy_strip_incoming_mailids' : boolean_cracker, 'pop3proxy_notate_subject': boolean_cracker, }, + 'smtpproxy': {'smtpproxy_ham_address' : string_cracker, + 'smtpproxy_spam_address' : string_cracker, + 'smtpproxy_shutdown_address' : string_cracker, + 'smtpproxy_servers' : string_cracker, + 'smtpproxy_ports' : string_cracker, + }, 'html_ui': {'html_ui_port': int_cracker, 'html_ui_launch_browser': boolean_cracker, *************** *** 581,583 **** if not optionsPathname: ! optionsPathname = os.path.abspath('bayescustomize.ini') --- 604,606 ---- if not optionsPathname: ! optionsPathname = os.path.abspath('bayescustomize.ini') \ No newline at end of file From anadelonbrin at users.sourceforge.net Wed Feb 26 20:13:34 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Feb 27 00:13:41 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.54,1.55 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv30319 Modified Files: pop3proxy.py Log Message: Provides options to (a) add an id (as a header or in the body) to incoming mail, (b) strip such ids from incoming mail, (c) find (via the ui) a message based on id, (d) train messages by forwarding/bouncing them to a smtpproxy. Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.54 retrieving revision 1.55 diff -C2 -d -r1.54 -r1.55 *** pop3proxy.py 26 Feb 2003 00:11:38 -0000 1.54 --- pop3proxy.py 27 Feb 2003 04:13:31 -0000 1.55 *************** *** 23,26 **** --- 23,27 ---- (default 8880; Browse http://localhost:8880/) -b : Launch a web browser showing the user interface. + -s : Start a SMTPProxy server for training use. All command line arguments and switches take their default *************** *** 496,502 **** state.numUnsure += 1 ! header = '%s: %s\r\n' % (options.hammie_header_name, disposition) headers, body = re.split(r'\n\r?\n', messageText, 1) ! headers = headers + "\n" + header + "\r\n" if options.pop3proxy_notate_to \ --- 497,520 ---- state.numUnsure += 1 ! if options.pop3proxy_strip_incoming_mailids == True: ! s = re.compile(options.pop3proxy_mailid_header_name + ': [\d-]+[\\r]?[\\n]?') ! messageText = s.sub('', messageText) ! headers, body = re.split(r'\n\r?\n', messageText, 1) ! messageName = state.getNewMessageName() ! if command == 'RETR' and not state.isTest \ ! and options.pop3proxy_add_mailid_header == True: ! if options.pop3proxy_mailid_as_header == True: ! id_header = options.pop3proxy_mailid_header_name + ": " \ ! + messageName + "\r\n" ! if options.pop3proxy_mailid_in_msgbody == True: ! body = body[:len(body)-3] + \ ! options.pop3proxy_mailid_header_name + ": " \ ! + messageName + "\r\n.\r\n" ! else: ! id_header = options.hammie_header_name + "-ID: Test\r\n" ! ! header = '%s: %s\r\n' % (options.hammie_header_name, disposition) ! headers = headers + "\n" + header + id_header + "\r\n" if options.pop3proxy_notate_to \ *************** *** 519,523 **** if command == 'RETR' and not state.isTest: # Write the message into the Unknown cache. - messageName = state.getNewMessageName() message = state.unknownCorpus.makeMessage(messageName) message.setSubstance(messageText) --- 537,540 ---- *************** *** 650,654 **** self._buildClassifyBox() + self._buildBox('Word query', 'query.gif', ! self.html.wordQuery)) self._writePreamble("Home") self.write(content) --- 667,674 ---- self._buildClassifyBox() + self._buildBox('Word query', 'query.gif', ! self.html.wordQuery) + ! self._buildBox('Find message', 'query.gif', ! self.html.findMessage) ! ) self._writePreamble("Home") self.write(content) *************** *** 877,888 **** numDeferred += 1 if targetCorpus: ! try: ! targetCorpus.takeMessage(id, state.unknownCorpus) ! if numTrained == 0: ! self.write("

    Training... ") ! self.flush() ! numTrained += 1 ! except KeyError: ! pass # Must be a reload. # Report on any training, and save the database if there was any. --- 897,916 ---- numDeferred += 1 if targetCorpus: ! sourceCorpus = None ! if state.unknownCorpus.get(id) is not None: ! sourceCorpus = state.unknownCorpus ! elif state.hamCorpus.get(id) is not None: ! sourceCorpus = state.hamCorpus ! elif state.spamCorpus.get(id) is not None: ! sourceCorpus = state.spamCorpus ! if sourceCorpus is not None: ! try: ! targetCorpus.takeMessage(id, sourceCorpus) ! if numTrained == 0: ! self.write("

    Training... ") ! self.flush() ! numTrained += 1 ! except KeyError: ! pass # Must be a reload. # Report on any training, and save the database if there was any. *************** *** 895,898 **** --- 923,929 ---- self.write("
     ") + title = "" + keys = [] + sourceCorpus = state.unknownCorpus # If any messages were deferred, show the same page again. if numDeferred > 0: *************** *** 916,919 **** --- 947,980 ---- start = self._keyToTimestamp(params['prior']) + # Else if an id has been specified, just show that message + elif params.get('find') is not None: + key = params['find'] + error = False + if key == "": + error = True + page = "

    You must enter an id to find.

    " + elif state.unknownCorpus.get(key) == None: + # maybe this message has been moved to the spam + # or ham corpus + if state.hamCorpus.get(key) != None: + sourceCorpus = state.hamCorpus + elif state.spamCorpus.get(key) != None: + sourceCorpus = state.spamCorpus + else: + error = True + page = "

    Could not find message with id '" + page += key + "' - maybe it expired.

    " + if error == True: + title = "Did not find message" + box = self._buildBox(title, 'status.gif', page) + self.write(box) + self.write(self._buildBox('Find message', 'query.gif', + self.html.findMessage)) + self._writePostamble() + return + keys.append(params['find']) + prior = this = next = 0 + title = "Found message" + # Else show the most recent day's page, as decided by _buildReviewKeys. else: *************** *** 921,925 **** # Build the lists of messages: spams, hams and unsure. ! keys, date, prior, this, next = self._buildReviewKeys(start) keyedMessageInfo = {options.header_spam_string: [], options.header_ham_string: [], --- 982,987 ---- # Build the lists of messages: spams, hams and unsure. ! if len(keys) == 0: ! keys, date, prior, this, next = self._buildReviewKeys(start) keyedMessageInfo = {options.header_spam_string: [], options.header_ham_string: [], *************** *** 928,932 **** # Parse the message, get the judgement header and build a message # info object for each message. ! cachedMessage = state.unknownCorpus[key] message = mboxutils.get_message(cachedMessage.getSubstance()) judgement = message[options.hammie_header_name] --- 990,994 ---- # Parse the message, get the judgement header and build a message # info object for each message. ! cachedMessage = sourceCorpus[key] message = mboxutils.get_message(cachedMessage.getSubstance()) judgement = message[options.hammie_header_name] *************** *** 962,966 **** page.table += self.html.trainRow ! title = "Untrained messages received on %s" % date box = self._buildBox(title, None, page) # No icon, to save space. else: --- 1024,1029 ---- page.table += self.html.trainRow ! if title == "": ! title = "Untrained messages received on %s" % date box = self._buildBox(title, None, page) # No icon, to save space. else: *************** *** 1003,1015 **** def onWordquery(self, word): ! word = word.lower() ! wordinfo = state.bayes._wordinfoget(word) ! if wordinfo: ! stats = self.html.wordStats.clone() ! stats.spamcount = wordinfo.spamcount ! stats.hamcount = wordinfo.hamcount ! stats.spamprob = state.bayes.probability(wordinfo) else: ! stats = "%r does not exist in the database." % cgi.escape(word) query = self.html.wordQuery.clone() --- 1066,1081 ---- def onWordquery(self, word): ! if word == "": ! stats = "You must enter a word." else: ! word = word.lower() ! wordinfo = state.bayes._wordinfoget(word) ! if wordinfo: ! stats = self.html.wordStats.clone() ! stats.spamcount = wordinfo.spamcount ! stats.hamcount = wordinfo.hamcount ! stats.spamprob = state.bayes.probability(wordinfo) ! else: ! stats = "%r does not exist in the database." % cgi.escape(word) query = self.html.wordQuery.clone() *************** *** 1266,1270 **** - # =================================================================== # Test code. --- 1332,1335 ---- *************** *** 1528,1532 **** # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'htbzpd:D:l:u:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ --- 1593,1597 ---- # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'htbzpsd:D:l:u:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ *************** *** 1534,1537 **** --- 1599,1603 ---- runSelfTest = False + launchSMTPProxy = False for opt, arg in opts: if opt == '-h': *************** *** 1543,1546 **** --- 1609,1614 ---- elif opt == '-b': state.launchUI = True + elif opt == '-s': + launchSMTPProxy = True elif opt == '-d': # dbm file state.useDB = True *************** *** 1563,1566 **** --- 1631,1641 ---- # Do whatever we've been asked to do... state.createWorkers() + + if launchSMTPProxy: + from smtproxy import LoadServerInfo, CreateProxies + servers, proxyPorts = LoadServerInfo() + CreateProxies(servers, proxyPorts, state) + LoadServerInfo() + if runSelfTest: print "\nRunning self-test...\n" From anadelonbrin at users.sourceforge.net Wed Feb 26 20:13:36 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Feb 27 00:13:45 2003 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui.html,1.4,1.5 ui_html.py,1.4,1.5 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1:/tmp/cvs-serv30319/spambayes/resources Modified Files: ui.html ui_html.py Log Message: Provides options to (a) add an id (as a header or in the body) to incoming mail, (b) strip such ids from incoming mail, (c) find (via the ui) a message based on id, (d) train messages by forwarding/bouncing them to a smtpproxy. Index: ui.html =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui.html,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** ui.html 14 Feb 2003 14:40:47 -0000 1.4 --- ui.html 27 Feb 2003 04:13:32 -0000 1.5 *************** *** 155,159 **** !

    These are untrained emails, which you can use to train the classifier. Check the appropriate button for each email, then click 'Train' below. 'Defer' leaves the --- 155,159 ---- !

    These are emails that you can use to train the classifier. Check the appropriate button for each email, then click 'Train' below. 'Defer' leaves the *************** *** 260,263 **** --- 260,272 ---- + + + +


    +

    findMessage

    + +
    + +
    Index: ui_html.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui_html.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** ui_html.py 14 Feb 2003 17:29:21 -0000 1.4 --- ui_html.py 27 Feb 2003 04:13:32 -0000 1.5 *************** *** 5,171 **** import zlib ! data = zlib.decompress('x\xda\xc5[{s\xe36\x92\xff?U\xf9\x0e\x08\xb7fh\xd7\xda\x92l\xcf\xccee\x89u3\ ! \xb6\xb3N\xed<\xbc\xb6\xb2W\xa9Tj\n\x14!\x89k\x92\xe0\x91\xa0e\xed\xd4~\xf7\ ! \xedn\x00$\xf8\xb0\xa3\xb9\xd4\xeeyR6\x89G\xa3\xd1\xe8\xfe\xf5\x03\xcc\xec\ ! \xbb\xcbO\x17\x8b\x9fo\xae\xd8\xf5\xe2\xc3{v\xf3\xd3\xbb\xf7?^0\xefx<\xfe\ ! \x9f\xb3\x8b\xf1\xf8rq\xa9;^\x8d&\'lQ\xf0\xac\x8cU,3\x9e\x8c\xc7W\x1f\xbd\ ! \xe0\xdbof\x1b\x95&\xf4W\xf0\x08\xff\xaaX%\x82\xc5\xd1\xdc\xa3\'/\xb8\xcby\ ! \x1a\xf2\x9d(\xd9O\xa5(\xd8\x8f\x99\x12\xc5\x8a/\xc5lL\x03pN\xa9v\xf4\x10\ ! \xcah\xc7\xbe\xb0\x95\xcc\xd4\x94\xfdi\xf2\x82\xf1"\xe6\xc9\x11+\xb7qY\x1e\ ! \xb1\x8dH\x1e\x84\x8a\x97\xfc\x9c\xa5\xbcX\xc7\xd9\x94M\xd8?\xbf\xfdF\xf1\ ! \x10\x96\xdcc\x1e\x0e^\xc9"\x85\xb1-\x02\xdf~\xc3\xa7I\x9c\xddC\xfbR&\xb2\ ! \x98\xb2?L\xe0gI\x9d|\xfa\x10\xc3\xaeE\xf4D\xefF>\xc0\xb6\x9a\xbe7\x93W\x93\ ! \xd5\n\xfbFK`Hd\x8aq\xc3\xdc\xf1V\xc4\xeb\r\xf0\x18\xca$\xa2\x11\x0f\xb1\xd8\ ! \xd2\xca\xbd!\x19\xf0\xc9\x13\xcd\xdd(\xe4YF\x8b\x84|y\xbf.d\x95E\xb0\xd2r"`\ ! \xa5s\x96\xf3(\x8a\xb3\xf5\xfcu\xfdx\x9c\x88\x15\xd08y}\xfe\xed7\xac\xfe\te\ ! \x11\x89\xe2X\xc9\x1c\xba\xf2GV\xca$\x8eX\x98\x00\xcd\xa1q\xa1TJ\xa6\xbd\xa1\ ! \xc47\x9e61D<\x97\xf1?\x04\x8c;;{\xd1\xdav#\xe5\x93\xd7\xd4\x91\x8a\xb2\xe4k\ ! \xa1\xcfK\xa1@;\xfc\x8a\xc7f\x0b\x85\x96\x03\xb4\xd1\xdcR,Q\xf3\xecYk\x1e]\ ! \xe6\xfe\xf0\xfd\x04\xff\x9d\xb3m\x1c\xa9\r\xe8\xc1\xeb\x17\xeeD\xe4\x18\xc8\ ! vd\xb8Z\xad\xb8\x98\x9c\x0f\xf0\xe1\n\xe47\x05c\xd7\x1e\x9a4|\xf0\x86+\xa3\ ! \xf1fy 9\xc9\x1f\xcd\x99\x17\x02\x95C\x0b\xbad\x83\n:!\x15\xec\x8d\x1c\xd6W3\ ! \xb8\xd1\xb8g(:\x83\x9e#\x86\xfbPE\x9c\x8b\xcf2\xd3\xe7\xd9\xd2O\xb1\xc4\x7f\ ! f`\x15_\x03R\\\x0f\xe9\xcd\xe9\xe4\xc5yM\xfd{\xa2~>,\xb6\xd9\xd8B\xc5ll\x00\ ! \x07\x1eI\x88\x888\xf8\xe0Q\x1b\n~\x16\xc5\x0fl\x99\xf0\xb2\x9c\xfb\xda\x80\ ! \xfc\xc0\xf4\x949\xcfh\x06@C*\x94\x17\xcc\xe2t\xcd\xcab9\xf7u\xcbh\x1d\xaf|\ ! \xc6\x93x\x9d\xcd}\x1e\x96i\x1cE\x89\xf0\xc7\xc1\xcb,,\xf3s\xe0\x03(\xb4\xa8\ ! \x99\x85\xf4)\xd8\x85\xda\x8b\xe5\x85\x8c\xaa\xa5\xfa\xc8\xd3\x16,B\xf3\xe3\ ! \xceP\x9c:\xf3\xb8\xe6P\xa6\x02\x8f\xc2c\x9bB\xac`\x01x\xf7\x83k\xf8=\x1b\ ! \xf3\xe1e\xc0\xc42Z\xe3\xe5Z\x9d\xb3*\x1e!F\xb7Yn\xbf\x80\xa0\xb4$QbH"\xe5qv\ ! \xa1\xad\xd83;\xf3\x8cUk\xf9\xce6\xaf\x02@r\xbd!\xd0c8\x8eW\x8d\xe0\xf3`\xb1\ ! \x89K\xb6\x8a\x13q\xc4fK\x19\x89\xa0\xe6\x82\xde\x8eX$Vq\x06\x9bW\x1b\xc1\ ! \x12)\xef\x8fy\x16\x1d\xaf\x84H4\t\xb9\xa2\x9e\n=Fl=\x86mmDw\'\n\x00\xdf\x11\ ! c\x0bh~\x00\xd4\x97U\xa9\t\xe41\xe8^\x893\xc8\x7f\xe9\xe5"p\x05\x85\x00\xef \ ! \x98xT\x05_\xa2b\xc3\xc2\x80SY\x9c\'U\xc2\xa9Ai\x12E\x95\xa98\x05\xa0\x92,\ ! \xda\x81@\xc1\x81$\xc9\x8e\xe9S\x14\xc4\t\xd1V\x1b\xae\xda|\xe9\xf9\x9a9V\ ! \xe2\x9f\x92U9;\x86QF,\x0c\xd6\x06S\x85\xff\xc0\xf6\x13\xa1\x94\xb0\xac\xea\ ! \xb9K\x99\xe62\x03q\x97\xb09-\xc1\x9b\xdd\x07\x91D\xef\xc1\x18\x8d\x10Y\xac\ ! \xc5\x97\x02?\x88\xa7\xc8\x060\xf7\x10G\x96\x03\xcb"@\xfe\xb1\x0c\xff\x0e\ ! \x90\x03;\xcds\x84\xc1\x04\xf8,\xf4\x9c\x94\xdf\x03\x7f\xb07\xcd^.\xcb2\x06\ ! \x90\x1di\x1aW|\xb9\xd1\xd2\xac\x85I\xb32!\xa2\x12e\x13\n->#\xbd\rn*3\x8a@|\ ! \xc6\x91eX\xf1\xf5\x11\xc9;\x14\xb0A\\\x94\xdd\xec\xd4\x06\xc0\xc3p\x07t\x8d\ ! \xd4G\xb3q\xee*\xd4[\xa4\n\xa7\xc6\xd3[\x8a$\x81\x99KLv\xfc\x06\xb3T\x11\xcc \xfa\xb4\xe4\xa0\xd7\xeb\xcc7\ ! \xcd>Z\x0f.=\xf7O!\x86\xa2\xa5\x19\x01\xbf\x8a\xc0\x8a\x80N\x8f\xa2\x1f\x83"\ ! \\\xc0\xba>Z\x04\x85\x83\x90?\xf9\x1d\xfa\x18x\xfa:\xf1\x00\xd2\x137\xfc\xd3\ ! \xe6\x07\xd1\xa5%\xe6\xeb8\xb3T\\U\xa5\x8e3\xcd\xc4W\x13\x1f\xdc\x18F\xbc\ ! \xf4l\x83M\'\xc6C>\x9dW\xb3i\xd8A\'Xk\xf1\xe5\xcc\xa0\xb0\x0c\x9d$s\x04\x0fV\ ! \xf0A\x96j8\xe6*m$\x05\xda\xd3\xd0\x01\xe3/a9\x90\x1eX\x12\x9a\x82\xd5I\x13\ ! \x8a5s\xc0\x9a\xdb\xa3\x1d\xd5\xd5)\xd5#\x0c\xd4\xce;\x1b\x8c\x15\x11\x02\ ! \xf8\x83\x04W\x17Uy\x02\xfe\x117fc\x91\x91\x85 {\x80\xf0\x0bu\xc5\xec\xd9D\ ! \xeba1\xee\x06\xbe{h\xae>\xa1\x96\xdaj\xddD\x99\xeb\xce\x05\xbe{&/\x9c{\x00\ ! \xa1\xae\x8a\xdaw\x9b\x9f\xc3{[\xc1\x06\xc3wM\xd9{*\x83x\xdc]B\x02\x1d\'\xad\ ! \x117\x9fn\xcet\x1e\x81\xf1S\x86H\x022\x9a\x85Ag\xea\r\xf8\xee\xf2\x8e\xc0\ ! \xd3\x0bNNN&F\x1e\xb3q\x18\x1c\xb9\xc7\x0bc\t\xccd\x9b\x08\xc5\xb2EM\xc1x\ ! \x11H\xf8S\x87\xd0\xc8UX\xdd\xda\x1c\x00\xfe\xbc\x05\xdd|\x10\x9ai0\x08\xa4H\ ! \x87ZN;f\xe3.\xcdi\xd2\x9d\x80\xe0\x14Fz\x81\xcb\xfa\xa8\xb3@\x9f\xb2\x06\ ! \xedR\xcf~n\x19%\x15O\xf6[\xe5*\xc5c\xd0\xba\x13\xafbtV{.\x92U)\xe6\n\x1d\ ! \xfa\x0c\x9e\xd2\xa3\xe7\xa7]\xf7gm~s\xd2OYY\x15\xa23\xad\xa2\xc6\xee\x9e\ ! \x16\xb8}&\xf4\xce\xc0ua\xd2\xd4\xd9\tr>\xed\xac\x81\x9c\xb7\xe9\xb7\xe7\\\ ! \xf7\xa7l\xba3:\x9ctR\xd4\xdaX\x10\xf4L|k\n=o\xc4\xa3o\xa0\xd2\xfd\xdd\x99\ ! \xe3\xa2\xb7\xbbL\x9d\xf8\x83\xbe\xac\xe2\xf5s\x80l\xeb\x01X$\xf2]^\xfb\x8b\ ! \xf7\x7f\xff\x0c\xa1\xff\x12\xeb\x04\xb4\x0c\x88\x1e\x93\x81\xa2\xc9\x19;\ ! \xdb\xdf\x87d\xb9+\x95H\x01\xb0M\xdc\xd1\xd1\x02n*\x06zE?\xb80+k\x00\xc5\x80\ ! \x0b+\x08#WN\x8e\x0b\xacq\xf47qR\x97\x9f\x16\x105\xb6\xc3\x0e:\xe7\xa6\xd3\ ! \xe2\xd5\xa2\x95\xc1k\xd0*\x95,\xea\x1c\x14\xf2I]\'\x04W\xa5\xc0\x9cDi\x10\ ! \xc5\x8a\x90\xf4\x92\x06\xd6\xb6\x07\xd1\x1f/\xc1\x02\xb1\x0e\xb5\x91eCB\xcf\ ! \xacE\xd4HE3\xe6\x07\xb7\xf4\xb7\x1e\x8f"!\xd9\xd4\x9ee\xcf8\xc7l\x14\x856\ ! \x1c\xe9\xe8\x01$\xd5\xda\r`\xa1\x04\xdc!\x85f\x90\xf7j{3\xe6w\xc4\xb6\x9b\ ! \x18b\xd5\x9d\xd95\xf8ef\xc3\xc7!\t\x80\xeb\xbd\xd8\x88\xe5=5C\x96_H\xc8\x07\ ! 0\xcb\x0c+\xa5@.+ib9\x81\x19=\xadq\x84c\xb1t\x15\xc34\x7f\x81D}\xc8\xcd!X\ ! \x04b\xfe\xa5X\x89\xc2\x87\xc4\x95?\xe8B\x8d\x9en$E>\xfb\xc8\x14\x00,\xe7\ ! \xb0\x0c&\xb6\xc4\x0b\x11mb\x03=\xf92.\x97\xbc\x88\xd8\x98\x11u\xf8\x0b\xd0\ ! \x00\xbfQ!\x98\xad`\x02\xcd%\xed\xa4\x89v\xf5l\xbd\x93R\x87\x11\x1cu\x83\xa2\ ! \x1c\x1b\x84\xace;\xf0\xa5R?\xa7!\xf5y\x03\xfb\xa0 \x10\x8a\xfd\xf9j\xe1\xe0\ ! \x80I\xf6)\x01\xf27q\x14A\xe0\xc2\xb0\x886\xf7A\x8a\x12\xc4\x80\xe1\x9by\xd4\ ! \xa9\x91?q1\xe0\x19\x02\x19(\xbf\x9e\xaf\x9f\x06\xa7\xeb\xc8\xc2\x04\x12\xd0\ ! \xd7\n\x1c\xec{?\xf6ub\x89\x16\x07e\x15\xa6\xb1\xb2\x1c\xac\xa5\xc3\xff;\x12\ ! \xa2?T\xaa\xc6\x1f\xc3\xdd\r\xcaKV\x90\x92q\x88l\xa3\xb8D\xfe\x80\xc4\x89S\ ! \x03m\x050\x1af\xf7\xe1\x02\xa5\xb0\x1f\x13\x1fa\xe4\xd72\xf0\x14Xvc\xe7~\ ! \xb0\x88it\x91\xb6"\xc7\xc6\x8a\x97E\x9c\xdbma\x82<\xfe;\x7f\xe0\xba\xd5\x0f\ ! \xc6\xe3\xd9w\xbf\\\\\xbe]\xbc\xfd\xc5$\x9bU\xa65Sf\x1a\x1f\x0ep&]HA\xfa\xb9\ ! \x90\xa6H\xf1\xa5\xe1\'^\xb1\x03\xc8\xdc\xab\x14\x02\xe5\x11\xf2Q\xb2\x97/Y\ ! \xbbe\x94\x88l\ryY0g\xa7\x87\xcd\xd4/m)\x92\xd6\xcf;S\x7f9\xf9\xf5\xbc7\x8c\ ! \x1d\xc40pr\xceb6\xa3if\x01h\xf8\xe3\x1f\x0f\xdb\xc3\xbf\xf4\x8f\xaa\x84h\ ! \x9c\x8a\xd9@\x04g\xff\x12\xff:\xc2\x93\x1eQ\xc7\x81?\xf5\x0f\x07.Dp\xa3\xf5\ ! L\xbb\xa3\xf9\x9c\x9d\xe1~\xeb\x0e\xe0\x17\x1bQj\xd0>\xac&vMS.\x99\xd7\xd2\ ! \x1d)\xf9^nEq\x01.\xe1\xe0\xf0\xb0?\xfb\xcb\xf3\x04\t|\x00\xcd`\xfd\xa2\x12\ ! \x03{\xf8g\xbb\xc9y5\x8f\xe6\xcfx\xfc\xeb\xafM\xe2A\xca\xb2\':\xdd|\xba[\xf8\ ! A7\xeb\xf0m\x1e\xac\x93<\xf7:m_\xc0\x00\xb4p\xbc\xd1]\x15Z\xff\xd5J+!\x18\ ! \x0b>X\'\xec\x04\xb8\xbcdxY<\xa5x\xad\x97\x8c\xe2\xac\x1f\n\x99>\xd1m\xd9n]W\ ! \x018\xc8m\xc1s4l\x9c?\x1c\xc3x\x8d\xb5M\x1b\x9b\xf2\x91\x17\xff\x08<\x95\ ! \xf6*\xa0o^`\x9e\xc9\x8f\x8f\xffo\xd4\xc8\xef\x11-|\xfa\x1d\x94\xc0\xb7\x11\ ! \x1d\xf8\xfb;\xa8\xa0o$2\xf8\xd0\xbe\xf4\xd1\x9a\x15\x06\x83%\x82vA\xc39\xf5\ ! [\xb9m\n\x05\xf6\xfe\xce\x1f>,{\x13\xe8\xf7O\x86\x12\xc2\x8a\n\xf7\xe6r\xca\ ! \xc3\xd1^\xd07\x98[1ub>\xbc\x01\xd0\xf5[V\xc8\xe5\xfdw\x8c}\x7f|\xd8\xb9\xcd\ ! \x1a\xaet`\x11\xef\xb3\x17\xdcB\\\x14\x0bv\x1dg\x11\x18\x06V6\x0bj\xf9o\xc0;\ ! \x88y2LHu\xbd\xb2G\xc5\x84\xf0K\xacG\x14~\xaf\xc2\xd2s\xe5\x05\x8fbi=\x98\ ! \xb1\x84\xdd\x94\x8c\xe0/W?\x0f;0\xe44\xd2jh\xeb\xa8\xbey\xaf\x9dW\xff\xf7\ ! \xbf\x83\t\xd4_\xaf\xf1\xa7\xf4\xfe\x9fe\x01\xd3<\xd7\xa5\xc3\xfb\x7f\x96\ ! \x01JM\x1d\x06\xf0\xdd\x1f\xefm1a\xc2\xb3{4\x98\xa0\t.t\xb5\xe9\xf9\xd7>%\ ! \x8a\x91\xf7\xa0\xb4\x87\xbevr\xc5\x9e\xacl\xdce\xb6\xac\x03\xfb=\xf6\xfct8\ ! \xf45u\xb3*O$\x8f\xfa\x918\nA\xf7y\xb5\xdf#\xa1\xb4\x0e\xaf\xe5\x01\x99\x808\ ! \x8a\xb6\x94V\x89\x8as^(\xe2\xea8\xe2\x8a[H\xba\x8a\xa9\x98\xaeI3^\'(M\xea%\ ! \x8b\xcf)^u\x04\x10\xf3\xa4T\xef\xa5\xddP\xed\xbe}\xdb\xa7\x17\xc3v\xabj\xfa\ ! \xd9H\x12\x84\xe8\xc8\xfeSA\x17\x12\x82\x92\x8f\xedF&u\xda\t\x91U\xb6L**\xe9\ ! \x1bGwH\t\xd3\xd4\xadEb\x1c\t\x99\x1f7+)\xca\x0e\n\xb9\x05\xd8=\xd3\xa5\xea\ ! \xb9\x8f7{p.f\xa4\xbb\xbas?\xa9\xcf\xfb\xb3V\xf5\xa1\xd0\x9b2\xc9\xb6>\xa0/\ ! \xbfsm\xa1Oo\xf3\x95\xe4\xae\x9f\xa5fmv\x1f\x92\x17flC\xce*c\x9d8\x1f`\x1d\ ! \xc1\x9c9\xa9\xd7\x1a/X+,\x02`h\x1bJ\xbc\xc0@\xce\xe8Z%\x8bl\x18Cu\xcdc\xa7\ ! \\\x12gn\xb2\xdcT\xaeY!R\xf9@\xdf\x03\xd4\x97\xd2\x87{\x97\x03\xb6\x90\xc9\ ! \xfd\xb5\x12\xc5n\xd8\x0e\xea\xee\xc6\x14\xb0\xe9\x7f\xb1\xc9\xef\x8b\x10\ ! \xfb<+)xn\x14\xd2II|}\x93\xe7\x9fM\xba\xa70\x8c\n\x10\x1f\x82\xbe\x9a\x9b5{\ ! \x95\x1e\r\x89|\xaf\xdd\xde)\xae\xca\xe1\xd2G\xddmc\x83\x8fU\x1a\x82\xc9Bn\ ! \x8f:[\x17_:5B\xec[\xca\n\xbf999={\xa2\x12\xdb\x90\xda\xf1J\x94\xce=\x1d\xfe\x90\r\xea\xec\x04\xe4O#\xd8Z\x9a\ ! \xfb\x9c#\x96\xc4\xf7B\x0b,D=\x06\x8c`\xf5\xad\xd3\xa1\xfb\xa5Ss\xadB4\x16n\ ! \x8d\xac\x19T4#\xc8\x8d\xf6]\xbe\r\x13q\x8cg\xab\xc4\xdd\xef\t\x01P\xaf\xccg\ ! $x\x8e\xfd*B\x8b\xd4\x90\x80z\x13\x1c\x1f:\xe4G\xddBv\x1e\xdc\nU\x15YS\x88l\ ! \x7f\xd0\x85\x17\xd9\xf6\x1c\x01\xb6\xe8\x9ex:|\x1ez\xd0[=\xc6\xd3\x80h\x81\ ! \xd3@\xa2=\x8b\xc3\xe1\xaf\xbf\xf6Q<*\x1a\xff\x00\xd4\x9e\xd0\xb9\xba\xbfV\ ! \xb7\x1f>\xdd~\xb0\xa8\xe6\xd1\xbd\xb8\x909\xa8\xa3u\xed\x1e\xbav\xaf\xf7\ ! \xb5\x18\x16]\xb1\xe2\x07\xfe\x8fj\x9eT\x05\xc4\xc9T\xd0\x04\n\xe6\x1e\x87+{\ ! 7\x8f\xdf\x00\x99O\xad\xdcz2\xe0w)J]^\xa72\xe7\x88J\xc7EM\x83\xd3\'2\xb2\xa0\ ! \x0b\xc9\xbe\xf9\xfaf\xdc\rW\x1b\x84\\?\x18\x9b\x8b\xaeqn\x9aZ\x16\xfd\x1br\ ! \xb9h\xbe\x9b\xa3!\x8b\xb7\xef\xde_9c\x86\xef\x10\x07\xee\x0c\x17\xb7\xce,\ ! \xb0\x80\x93V\xd2\xbe\xb84\xb1\xa2G\x8a\xee\xb9yuG[k&\x13\x1e\x8a\xc4\x0b\ ! \xde\xe3\x1f\xfb\x01b\xff\xceyq\xd9^\xa7K\xee\xc7\x8f7?-Z\xdf\x8e\xe8/JN\xdf\ ! x\xb4\n\xf9 \xe3\xbdt\x06\x07q\\\xfa\x19\xf0\xf4s\xcas\xaf\x1d\x97v\x17\xa3\ ! \x03\xd9\x88$\x87#^b9\x1a\xe3#\xba\xc1?\x99L\xfc\xe0\xf9\x0b\x92g\xc8\xe9+\ ! \xfd\x16\xb1\xa7\xae\x88l\xc0q\r\xd3\x18n\xb0\xb1+\x1b\x10\x0c\xf0\x0f\x8f\ ! \xb7O\x9d\xdci\xf7\xe4\x9a\xb3\xb2\x01?\x1d\xa2\x1f\\TE\x81\x9fR\xff\r}\xf6t\ ! `?\xce)\x9b=7*\xa8\xe7\xd2T\x80\x07r\xfb\x87\xb5\xe6>\xc7\xa8\x93\xf4\xc00G\ ! \x96\xee\xd81)r\x1fR\xf4\xe7"\x94\xaf\xd4\xbc\xbaq\x88\x89\x07\xadBl\xb0\x18\ ! a>6\xba\xe3\x0f\xc2k\xdccM\xc46 \xb24\xaf-\x9c)\x84\xb6h\xb1\xe2\xe4\xfbz`\ ! \xf3;\xd8\xba\xd5\xb4\xf1\xcaB\x13\x1f`\xb1\xc5\xe0W\x7fi\xb0\xa9T$\xb7\x99)\ ! \xb9\r\\\xa3uFxX*\xbb3m\x84B\xec\xcfRF\xe1\xae\xf9n\xd2,\x89_J\xdb\xcf\xb3\ ! \xe0\xb9\x0e@WR*\xac\r\xd8\xe8\xb3\x04\xd1\xf7\xcb\x8f\xc63\x9b\xebP0\x92\ ! \x17Cu\xc5\x99\x93\xebF\xc3_\\7%#_\xaf\x8c\xfe\xceoy\xc0\x06\xc3o\xf4\x07\ ! \xd1\xdc\xfd$\xc1\xb9\xa1\x87@\xbcT\x80\xc7^\xf0Afp(Kv6a\'\xaf\xa6\x93W\xd3\ ! \xb3Sv:\x99\x9c\x1a\xd9\x8fZk\x9b\xb5\x94\xca\xa7\xe3\xf1v\xbb\x1d\x95v\xc1\ ! \x91,\xd6c\x87\x01|\xc7\xe5;7\xc8-\xcb\xecl\xb3uH\x84\xe8\x080-3\x1f\n\xc5Q\ ! \x1ev\x96\xb9\x121Q>\xe8_\xbf\xa6a"w4\x13\xf6\x12$p\xce\xecdLL5\xb7&\x16ib\ ! \x10\'\x8e\x07M\x91\xd1N\x7f=\xaf\xff\xb7\x9d\x7f\x01A?l\xc5' ) ### end --- 5,79 ---- import zlib ! data = zlib.decompress("xÚÅ[{oã8’ÿ€ù\016\034-º•`\023ÛIºûf\035[Øî$³\031\\?r‰ç\016ƒÁ AK´­$ê$*Ž¯±ßýªŠ¤D=\ ! ’vïàöÒƒD\022Éb±X_\0259³\037.?],~½¹b׋\017ïÙÍ/ïÞÿ|Á¼ãñø¿Î.ÆãËÅ¥nx5šœ°EÁ³\ ! 2V±Ìx2\036_}ô‚ï¿›mTšÐ_Á#ü«b•\010\026Gsž¼à.çé’ïDÉ~)EÁ~Δ(V<\024³1uÀ1¥ÚÑÃ\ ! RF;ö…­d¦¦ì/“\027Œ\0271OŽX¹ËòˆmDò T\034òs–òb\035gS6aÿøþ;Å—0å\036ã°óJ\026)ôm\021øþ\ ! ;>Mâì\036¾‡2‘Å”ýi\002?!5òéC\014«\026Ñ\023­\033ù\000ËjÚÞL^MV+l\033…ÀÈ\024ㆹ㭈×\033àq)“ˆz\ ! <ÄbK3÷ºdÀ'O4w£%Ï2šdÉÃûu!«,‚™Â‰€™ÎYΣ(ÎÖó×õãq\"V@ãäõù÷ß±úg)‹H\024\ ! ÇJæД?²R&qÄ–\011Ð\034ê·”JɴוøÆÝ&†ˆç2þ\037\001ýÎÎ^´–ÝHùä55¤¢,ùZèýR(Ð\016¿â±\ ! YB¡å\000ßhl)BÔ<»×šG—¹?ý8Áçl\033Gj\003zðú…;\0209\006²\035\031®V+.&ç\003|¸\002ùª`ìÜCƒ†7Þ\ ! pe4ÞL\017$'ù£ÙóB rhA—lPA'¤‚½žÃúj:7\032÷\014E§ÓsÄp\035ªˆsñYfz?[ú)Büg:Vñ5x\ ! Šë!½9¼8¯©ÿHÔχÅ6\033[W1\033\033‡\003$Dô8øàÑ7\024ü,Š\037X˜ð²œûÚ€üÀ´”9Ïh\004¸†T(/\ ! ˜Åéš•E8÷õ—Ñ:^ùŒ'ñ:›û|Y¦q\024%Â\037\007/³e™Ÿ\003\037@¡EÍL¤wÁNÔž,/dT…ê#O[n\021>?\ ! î\014Å©3Žk\016e*p+<¶)Ä\012&€w?¸†ß³1\037ž\006L,£9^®Õ9«â\021úè6Ëí\027\020”–$J\014I¤<Î.´\025{\ ! fež±j-ßÙæU\000ž\\/\010ô\030¶ãU#ø#½\015.*3Š@|Æ‘eXñõ\021É{)`8)»\ ! Ù©\0158\017Ã\035Ð5R\037Íƹ«Po‘*ì\032Oó„8ÙÈ­æw+‹{\012ò°«±î\025Åj)\037!\020¨&\000\037»¼\000\011ýÉЄ\001\ ! ¦ÉØà,ÎòJ‘94]<¦v¹\000l\003úãÆ%•¨›=†þlîL¼±1­fÏ\030lrÉv²r„Õ¨\020lÛºàiÊ•V²\ ! ikõh•ä÷ÀõÃÞ›\011þ«][\016b°]gl#]\015|ÎݨS\017G·«}ñÜ#O1ñ\002ø™±\005t\016ô²PÎÂö\034\ ! ‡ÆÏæLæ\";ðŒ3ðŽ˜W(ï\020\002\030\016\016÷¤\024É\020\0105\014ŒÐQ¿\022Å\001\022Ý—L^€_Ab£fwíÆ\007/\023u>¼ý\ ! õ\026?§\010ct¼³q\034ì¿$‡‹\021Qƒ5z\027\033ž­Eäý?.ɲÐ]Ò\030ô¬\027?Œ­ÞIÒlcÚhSä'ª\030‚x­ò\035×\ ! ŽA«\004ÞE™ùÆÿ¢‡Á+\016pš/%°žóR¡÷BÔ‘­Ñý\000š\004Ë?b²Ð\023`+Ño<¨¦¶*dÊÊ°à\012üY¬Ù\ ! \002ßú6‹ú1\010ý\010†»B&\011ð\000†&Àñ/wÖ«B3Öó7?f!Ê\011'GbdåF\010uÄ\024&M`íØ\006Ë•&¾!×\ ! à„³u…\013\007¯H¦\010\0161\023ÛÆÝv„tL^ZGÁRÔÜ $0¦A´Ù1ÁË]×y¾\023\011xK좽|/°6‚\036\022ø@×\ ! ‰\035ãbГ—õ\014\010\005\012tw³Ü\002\005\027éy\001\001¢è\035zW‡'\007€™fÏj“Fø\006N¹¨ßg¡H\022\030\031b²ã7>K\025Á\014Ð\ ! §%\007­^g¼ùì£õàÔsÿ\0240\024MÍÈñ«\010¬\010èô(ú1(Â\005Ìë£E\020\034„üÉïÐGàéëÄ\003HO\\ø§Í\017Ð¥\ ! %ækœY*®ªRãL3ðÕć0†ˆ—ž-Øt0\036ò鼚EÃ\012:`­Å—3‚`\031\006Iæ\010\036¬àƒ,Õ0æ*-’\002íi\ ! è€ñ—0\035H\017,\011MÁê¤bÍ\030°ævoGuuJõ\010\035uðÎ\006±\"º\000þ !ÔEUž@|Ä…Y,2².Èn üB]1\ ! k6h}YŒ»Àw\017ÍÕ;ÔR[­›(sݸÀwÏä…s\017\\¨«¢öÝæçðÞV°Aø®){Oe\020»KH ã¤ÕãæÓ\ ! Í™Î#\020?eèI@F³eÐ\031z\003±»¼#çé\005'''\023#Ùx\031\034¹Û\013}É™É6\021²EMÁD\021HøS‡ÐÈUXýµ\ ! Ù\000üy\013ºù 4Ó`\020H‘6µœvÌÆšÓ ;\001à\024zzËú¨3AŸ²vÚ¥\036ýÜ4J*žì7ËUŠÛ u'^Å\030\ ! ¬öœ$«RÌ\025:ô\031<¥GÏ\017»îÚ|uÐ/YY\025¢3¬¢Ý5-pùLè•A褩³\022ä|Ú™\0039oÓo¹î\017\ ! ÙtGt8館µ± Ó3øÖ\024zÞˆG߸J÷wgŒë½ÝiêÄ\037ôe\025¯Ÿsȶ\036€E\"ßåµ?yÿ÷¯\000ýC¬\023Ð\ ! 4 zL\006Š&gì,\037’å®T\"\005‡mpGG\013¸©\030è\031ýàÂ̬\035(\002.¬ Œ\\99!°ö£_õ“ºü´\000Ô؆\035´\ ! ÏM£õW‹V\006¯V©dQç Oê:!„*\005æ$JãQ¬\010I/©cm{€þx\011\026ˆu¨,\033\022zd-¢F*š1?¸¥\ ! ¿u\024\011ɦŽ,{â\034³P\024Ú0ÒÑ\035Hªu\030ÀB\011„C]‘0F‡Éõά\024b1³qhÕ\020n/6\"¼§ÏÙ\027\022r\000\ ! Ì,—•R ‹•4øM`\026O3\034a_,WÅ0Ì_ Q\037òq\000ˆ@Ì¿\024+Qø¬ò\007]œÑÃt(N\037™¤ßx\007\0249&³\ ! Ä\013\021mð€\036|\031—!/\"6fD\035þ‚;€ß¨\004ÌV-fH+i\020®\036­WRjèÀQ\037\010ÙXà±–m°Kå}N]ê=\006ö\ ! A)\000~ýíjáؾIð)éñ7q\024\001XaX8›û E\011b@Èf\036u:äO\\»†@\006\012¯Çë§Áá\032M\030ð\000m-°`ß\ ! ûx×Á\017-\016Êj™ÆÊr°–\016ÿïHˆþPy\032\014w7(/YA\032Æ\001ÍFq‰ü\001‰\023§îÙ\002-ÚµîÃ\005Ja?&>BÏ\ ! oeà)\007ÙÅË}€ˆ©s‘¶Ðbc¹a\021çvY˜\024ÿÎ\037¸þê\007ãñì‡ß..ß.Þþf\022Ì*Óš)3í\023\016p$\035B\ ! Aʹ¦0ñ¥á'^±\003ÈÖ«\024Àñ\010ù(ÙË—¬ýe”ˆl\015¹X0g§‡ÍÐ/m)’ÖÏ;C;ùý¼×\035ÄÐqr\ ! Îb6£af\002øðç?\037¶»éoU\011\010œ\012Ø@\004Gÿ\026ÿ>Â\036QÃ?õ\017\007\016Ap¡õH»¢ùœázë\006à\027?¢Ô\ ! àû°šØ9M‰d^Kw¤ä{¹\025Å\005„ƒÃÃþè/Ï\023$ç\003Þ\014æ/*1°†´?9¯æÑü\031ÿ½I6HYöôN\ ! 7Ÿî\026~ÐÍ4|›ûêÄÎ=BÛ×a€·p\"Ð]µ´1«•J\002\000\013>ØÀë€Z^2< ž\022Fë% 8ê§B¦O4[¶[\ ! GTà\034ä¶à9\0326Ž\037Æ-^cmÓƦ|äÅ?‚H¥£\012è›\027˜gŠÝãŽ\032Å=¢…O€\022Ä6¢\003ÿ\000\025ŒD\006\ ! \037Ú\007=Z³–Á`Y ]ÄpvýVn›â€=³ó‡7Ëžþùý¡$°¢b½9ò°·\027ô\015æVL\035œ‡U]³e…\014ï\ ! `ìÇãÃÎ\011Öpu\003\013wŸ½à6\0167±`×q\026a`5³ /\005\007˜'Ã$T×({T\014l\017±\006Qø½ªJ/”\027<Š\ ! ¥`Æ\022vS2‚¿úu8€!§‘VC[;õÍ{\035¼ú¿ÿ/˜@ýõšxJïÿZ\0260µsC:¼ÿk\031 tÔa\000ßýñÞ\ ! \026³Lxv\006\0234àBW˜žíS\"Œ¼\007¥=ôµ“\037ödeq—Y²\006ö{¬ùi8ô-µ²*O$úH\034… Û¼:î‘P\ ! Z›×Š€L\000Ž¢%¥U¢âœ\027Š¸:Ž¸âÖ%]ÅT@פ\031¯\023”&Ý’Åç\0247\002À<)Õxi5T¯oŸðéÉð»U\ ! 5ýl$\011Btdÿ© C\010AÉÇv#“:Õ\004d•…IEe|\023è\016)ašºõGÄ‘íq3“¢ì [p»gº<=÷ñ4\017\ ! öÅôtgwÎ$õ~Öª>\004½·\033ðm}ÀX~çÚBŸÞæ\033É]?KÍÚì>$/L߆œUÆ:Y>ÀÚÙsR¯5\036\ ! ªV˜ø#´]J<´@Îè(%‹,Œ¡Zæ±S\"‰37YnªÕ¬\020©| ;\000õAôáÞ%€-drÿQ‰b7l\007usc\012ø\ ! é¿ñ“ß\027!¶yVRðÜ(¤“’øúôÎ?›twaØ+\000>\004}5§iöø<\032\022ùWW»‚Hlðâðz\016Þ~ ·Y<Ž\ ! õjkÌž_üÉ~‹¿BwJ•’8²‡\035Övñ˜\021§ùg\004\022¼S\\•ÃuŸºÙ‚¤Uº\004F€\0034ÞºòÔ)b[(+\ ! ¼psrzöD\031º!µyšÒ¦&ôêõ›'\010Ý\024rÉ—1db;]fi\\j¨×u\001Í(\014ž!!ƒÖ‘v\031\007ÛZzÁdôo\ ! ?þå‰\011¿%¸X+¾\025%D„'ÄÜéä”بΔ7\013DñØ#\021û\015™%1c\022“\007}â•(CJü!g¤Ó4?õ`ki\ ! \016³ŽX\022ß\013-°%ê48KV\037¹\035º×¼š3%¢±p\013„M§¢éAx¢},^Æ>ž-‘w/SBd¹2whp\037ûå”\026\ ! ©!\001õ\0068`b\010P¸Uü<¸\025ª*²¦\012۾͆§øv\037ÁÓ!ùtx?t§·º§#ƒ &6ؽ8\034¾ú¶âQÅ\ ! ü' ö„ÎÕíµºýôéöƒuv\036]\012\0202\007u´\016ÏC‡çõ®ÊaÅ\031KŸ\000\004¨øKåP\034L^\012(˜C,®ìÅ\004¼\000e\ ! Åt\010d¥(õÙ\002Õ{GT7/j\032œî\007É‚Ncûæë›~7\\mÐýúÁØœòsó©eÑ_‘ËEsiº,Þ¾{\ ! åô\031>@\03580]Ü:£À\002NZÕ‹Å¥\001Í\036)ºç\026\030:ÚZ3™ð¥H¼à=þ±·/û\007î‹Ëö<]r?¼ùeѺ\ ! 8£¯Óœ¾ñh\026ŠG&’éT\026\000mú\031üéç”ç^\033 w'£\015Ùˆ$‡-\016±.@‘®/œL&~ðüéÐ3äô}†\026±\ ! §ÎÇ,òº†a\014\027ØØ•EF\003üÃãíS;wÚݹf¯læC›è\007\027UQà=òÿÄø=\035X³ËfÍ\012ê±4\024Ü\003A\ ! €ÃZsŸcÔÉþ ›#K·ï˜\024¹ïRô]\031JÜj^]Lb€±Uˆ\015VeÌM«;þ ¼&<ÖDì\007ô,ÍkËÏ\024B[´\ ! XqŠ}=gó\007غմñìF\023\037`±Åà7_³ØT*’Ûl\010Kê3ÄN\017\017k†wæ\033y!ö7)£å®¹4j¦Äkâön\ ! \032<7ÈTJ…E\022\013JK\020}\037’šÈl΂ÁH^\014\025XgNÒ\037\015_7ojg¾ž\031ãߊ€\017¿Ñ·Á¹{\037ùž\000\031I\ ! ©À\037{Á\007™Á¦„ìlÂN^M'¯¦g§ìt295²\037µæ6s)•OÇãív;*í„#Y¬Ç\016\003øŽÓwŽÏ[–ÙYf\ ! k“È££ƒi™ù\020,GyØQælÈ ~п~qÇ x4\023ö\022$pÎì`ÌÐ5·\006‹4\030ÄÁñ )2Úéÿu@ÿ?Kÿ\013\ ! ‹t¹ª") ### end From anadelonbrin at users.sourceforge.net Wed Feb 26 20:15:06 2003 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Feb 27 00:15:18 2003 Subject: [Spambayes-checkins] spambayes smtpproxy.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv31042 Added Files: smtpproxy.py Log Message: SMTP Proxy, similar to pop3proxy, using asyncore. Launch via pop3proxy with the 's' switch. --- NEW FILE: smtpproxy.py --- #!/usr/bin/env python """A SMTP proxy that works with pop3proxy.py, and monitors mail sent to two particular addresses. Mail to these addresses is blocked, a spambayes id is extracted from them, and the original messages are trained on (from the Corpus cache). You point smtpproxy at your SMTP server, and configure your email client to send mail through the proxy then forward/bounce any incorrectly classified messages to the ham/spam training address. To use, run pop3proxy.py with the switch '-s'. All options are found in the [pop3proxy] and [smtpproxy] sections of the .ini file. """ # This module is part of the spambayes project, which is Copyright 2002-3 # The Python Software Foundation and is covered by the Python Software # Foundation license. __author__ = "Tony Meyer " __credits__ = "Tim Stone, all the Spambayes folk." try: True, False except NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0 todo = """ Testing: o Test with as many clients as possible to check that the id is correctly extracted from the forwarded/bounced message. MUA information: A '*' in the Header column signifies that the smtpproxy can extract the id from the headers only. A '*' in the Body column signifies that the smtpproxy can extract the id from the body of the message, if it is there. Header Body *** Windows 2000 MUAs *** Eudora 5.2 Forward * * Eudora 5.2 Redirect * Netscape Messenger (4.7) Forward (inline) * * Netscape Messenger (4.7) Forward (quoted) Plain * Netscape Messenger (4.7) Forward (quoted) HTML * Netscape Messenger (4.7) Forward (quoted) Plain & HTML * Netscape Messenger (4.7) Forward (attachment) Plain * * Netscape Messenger (4.7) Forward (attachment) HTML * * Netscape Messenger (4.7) Forward (attachment) Plain & HTML * * Outlook Express 6 Forward HTML (Base64) * Outlook Express 6 Forward HTML (None) * Outlook Express 6 Forward HTML (QP) * Outlook Express 6 Forward Plain (Base64) * Outlook Express 6 Forward Plain (None) * Outlook Express 6 Forward Plain (QP) * Outlook Express 6 Forward Plain (uuencoded) * http://www.endymion.com/products/mailman Forward * M2 (Opera Mailer 7.01) Forward * M2 (Opera Mailer 7.01) Redirect * * The Bat! 1.62i Forward (RFC Headers not visible) * The Bat! 1.62i Forward (RFC Headers visible) * * The Bat! 1.62i Redirect * The Bat! 1.62i Alternative Forward * * The Bat! 1.62i Custom Template * * AllegroMail 2.5.0.2 Forward * AllegroMail 2.5.0.2 Redirect * PocoMail 2.6.3 Bounce * PocoMail 2.6.3 Bounce * Pegasus Mail 4.02 Forward (all headers option set) * * Pegasus Mail 4.02 Forward (all headers option not set) * Calypso 3 Forward * Calypso 3 Redirect * * Becky! 2.05.10 Forward * Becky! 2.05.10 Redirect * Becky! 2.05.10 Redirect as attachment * * """ from spambayes import Dibbler from spambayes.tokenizer import get_message, textparts from spambayes.tokenizer import try_to_repair_damaged_base64 from spambayes.Options import options from pop3proxy import _addressPortStr, ServerLineReader from pop3proxy import _addressAndPort, proxyListeners import string import socket, asyncore, asynchat class SMTPProxyBase(Dibbler.BrighterAsyncChat): """An async dispatcher that understands SMTP and proxies to a SMTP server, calling `self.onTransaction(command, args)` for each transaction. self.onTransaction() should return the command to pass to the proxied server - the command can be the verbatim command or a processed version of it. The special command 'KILL' kills it (passing a 'QUIT' command to the server). """ def __init__(self, clientSocket, serverName, serverPort): Dibbler.BrighterAsyncChat.__init__(self, clientSocket) self.request = '' self.set_terminator('\r\n') self.command = '' # The SMTP command being processed... self.args = '' # ...and its arguments self.isClosing = False # Has the server closed the socket? self.inData = False self.data = "" self.blockData = False self.serverSocket = ServerLineReader(serverName, serverPort, self.onServerLine) def onTransaction(self, command, args): """Overide this. Takes the raw command and returns the (possibly processed) command to pass to the email client. """ raise NotImplementedError def onProcessData(self, data): """Overide this. Takes the raw data and returns the (possibly processed) data to pass back to the email client. """ raise NotImplementedError def onServerLine(self, line): """A line of response has been received from the SMTP server.""" # Has the server closed its end of the socket? if not line: self.isClosing = True # We don't process the return, just echo the response. self.push(line) self.onResponse() def collect_incoming_data(self, data): """Asynchat override.""" self.request = self.request + data def found_terminator(self): """Asynchat override.""" verb = self.request.strip().upper() if verb == 'KILL': self.socket.shutdown(2) self.close() raise SystemExit if self.request.strip() == '': # Someone just hit the Enter key. self.command = self.args = '' else: # A proper command. # some commands (MAIL FROM and RCPT TO) split on ':' # others (HELO, RSET, ...) split on ' ' # is there a nicer way of doing this? if self.request[:10].upper() == "MAIL FROM:": splitCommand = self.request.strip().split(":", 1) elif self.request[:8].upper() == "RCPT TO:": splitCommand = self.request.strip().split(":", 1) else: splitCommand = self.request.strip().split(None, 1) self.command = splitCommand[0].upper() self.args = splitCommand[1:] if self.inData == True: self.data += self.request + '\r\n' if self.request == ".": self.inData = False cooked = self.onProcessData(self.data) self.data = "" if self.blockData == False: self.serverSocket.push(cooked + '\r\n') else: self.push("250 OK\r\n") else: cooked = self.onTransaction(self.command, self.args) if cooked is not None: self.serverSocket.push(cooked + '\r\n') self.command = self.args = self.request = '' def onResponse(self): # If onServerLine() decided that the server has closed its # socket, close this one when the response has been sent. if self.isClosing: self.close_when_done() # Reset. self.command = '' self.args = '' self.isClosing = False class BayesSMTPProxyListener(Dibbler.Listener): """Listens for incoming email client connections and spins off BayesSMTPProxy objects to serve them. """ def __init__(self, serverName, serverPort, proxyPort, state): proxyArgs = (serverName, serverPort, state) Dibbler.Listener.__init__(self, proxyPort, BayesSMTPProxy, proxyArgs) print 'SMTP Listener on port %s is proxying %s:%d' % \ (_addressPortStr(proxyPort), serverName, serverPort) class BayesSMTPProxy(SMTPProxyBase): """Proxies between an email client and a SMTP server, inserting judgement headers. It acts on the following SMTP commands: o HELO: o MAIL FROM: o RSET: o QUIT: o These all just forward the verbatim command to the proxied server for processing. o RCPT TO: o Checks if the recipient address matches the key ham, spam or shutdown addresses, and if so notes this and does not forward a command to the proxied server. In all other cases simply passes on the verbatim command. o DATA: o Notes that we are in the data section. If (from the RCPT TO information) we are receiving a ham/spam message to train on, then do not forward the command on. Otherwise forward verbatim. """ def __init__(self, clientSocket, serverName, serverPort, state): SMTPProxyBase.__init__(self, clientSocket, serverName, serverPort) self.handlers = {'HELO': self.onHelo, 'RCPT TO': self.onRcptTo, 'MAIL FROM': self.onMailFrom, 'RSET': self.onRset, 'QUIT': self.onQuit, 'DATA': self.onData} self.state = state self.state.totalSessions += 1 self.state.activeSessions += 1 self.isClosed = False self.train_as_ham = False self.train_as_spam = False def send(self, data): try: return SMTPProxyBase.send(self, data) except socket.error: # The email client has closed the connection - 40tude Dialog # does this immediately after issuing a QUIT command, # without waiting for the response. self.close() def recv(self, size): data = SMTPProxyBase.recv(self, size) return data def close(self): # This can be called multiple times by async. if not self.isClosed: self.isClosed = True self.state.activeSessions -= 1 SMTPProxyBase.close(self) def stripAddress(self, address): """ Strip the leading & trailing <> from an address. Handy for getting FROM: addresses. """ start = string.index(address, '<') + 1 end = string.index(address, '>') return address[start:end] def splitTo(self, address): """ Return 'address' as undressed (host, fulladdress) tuple. Handy for use with TO: addresses. """ start = string.index(address, '<') + 1 sep = string.index(address, '@') + 1 end = string.index(address, '>') return (address[sep:end], address[start:end],) def onTransaction(self, command, args): handler = self.handlers.get(command, self.onUnknown) return handler(command, args) def onProcessData(self, data): if self.train_as_spam: self.train(data, True) return "" elif self.train_as_ham: self.train(data, False) return "" return data def onHelo(self, command, args): rv = command for arg in args: rv += ' ' + arg return rv def onMailFrom(self, command, args): rv = command + ':' for arg in args: rv += ' ' + arg return rv def onRcptTo(self, command, args): toHost, toFull = self.splitTo(args[0]) if toFull == options.smtpproxy_shutdown_address: self.push("421 Closing on user request\r\n") self.socket.shutdown(2) self.close() raise SystemExit return None elif toFull == options.smtpproxy_spam_address: self.train_as_spam = True self.blockData = True self.push("250 OK\r\n") return None elif toFull == options.smtpproxy_ham_address: self.train_as_ham = True self.blockData = True self.push("250 OK\r\n") return None else: self.blockData = False rv = command + ':' for arg in args: rv += ' ' + arg return rv def onData(self, command, args): self.inData = True if self.train_as_ham == True or self.train_as_spam == True: self.push("250 OK\r\n") return None rv = command for arg in args: rv += ' ' + arg return rv def onRset(self, command, args): rv = command for arg in args: rv += ' ' + arg return rv def onQuit(self, command, args): rv = command for arg in args: rv += ' ' + arg return rv def onUnknown(self, command, args): """Default handler.""" rv = command for arg in args: rv += ' ' + arg return rv def extractSpambayesID(self, data): msg = get_message(data) # the nicest MUA is one that forwards the header intact id = msg.get(options.pop3proxy_mailid_header_name) if id is not None: return id # some MUAs will put it in the body somewhere # other MUAs will put it in an attached MIME message id = self._find_id_in_text(str(msg)) if id is not None: return id # the message might be encoded for part in textparts(msg): # Decode, or take it as-is if decoding fails. try: text = part.get_payload(decode=True) except: text = part.get_payload(decode=False) if text is not None: text = try_to_repair_damaged_base64(text) if text is not None: id = self._find_id_in_text(text) return id return None def _find_id_in_text(self, text): id_location = text.find(options.pop3proxy_mailid_header_name) if id_location == -1: return None else: id_location += len(options.pop3proxy_mailid_header_name) + 2 id_end = text.find('\r\n', id_location) id = text[id_location:id_end] return id def train(self, msg, isSpam): id = self.extractSpambayesID(msg) if id is None: print "Could not extract id" return if options.verbose: if isSpam == True: print "Training %s as spam" % id else: print "Training %s as ham" % id if self.state.unknownCorpus.get(id) is not None: sourceCorpus = self.state.unknownCorpus elif self.state.hamCorpus.get(id) is not None: sourceCorpus = self.state.hamCorpus elif self.state.spamCorpus.get(id) is not None: sourceCorpus = self.state.spamCorpus else: # message doesn't exist in any corpus print "Non-existant message" return if isSpam == True: targetCorpus = self.state.spamCorpus else: targetCorpus = self.state.hamCorpus targetCorpus.takeMessage(id, sourceCorpus) self.state.bayes.store() def LoadServerInfo(): # Load the proxy settings servers = [] proxyPorts = [] if options.smtpproxy_servers: for server in options.smtpproxy_servers.split(','): server = server.strip() if server.find(':') > -1: server, port = server.split(':', 1) else: port = '25' servers.append((server, int(port))) if options.smtpproxy_ports: splitPorts = options.smtpproxy_ports.split(',') proxyPorts = map(_addressAndPort, splitPorts) if len(servers) != len(proxyPorts): print "smtpproxy_servers & smtpproxy_ports are different lengths!" sys.exit() return servers, proxyPorts def CreateProxies(servers, proxyPorts, state): """Create BayesSMTPProxyListeners for all the given servers.""" for (server, serverPort), proxyPort in zip(servers, proxyPorts): listener = BayesSMTPProxyListener(server, serverPort, proxyPort, state) proxyListeners.append(listener) def main(): """Runs the proxy forever or until a 'KILL' command is received or someone hits Ctrl+Break.""" from pop3proxy import state servers, proxyPorts = LoadServerInfo() CreateProxies(servers, proxyPorts, state) Dibbler.run() if __name__ == '__main__': main() From popiel at users.sourceforge.net Thu Feb 27 16:02:47 2003 From: popiel at users.sourceforge.net (T. Alexander Popiel) Date: Thu Feb 27 19:02:53 2003 Subject: [Spambayes-checkins] spambayes/testtools corrected.py,NONE,1.1 dotest.sh,NONE,1.1 es2hs.py,NONE,1.1 incremental.HOWTO.txt,NONE,1.1 incremental.TODO.txt,NONE,1.1 incremental.py,NONE,1.1 mkgraph.py,NONE,1.1 perfect.py,NONE,1.1 sort+group.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/testtools In directory sc8-pr-cvs1:/tmp/cvs-serv1837 Added Files: corrected.py dotest.sh es2hs.py incremental.HOWTO.txt incremental.TODO.txt incremental.py mkgraph.py perfect.py sort+group.py Log Message: Adding a bunch of files relating to testing of incremental training regimes. The harness isn't really done, but I don't seem to have time to do all I want with it, so I'm releasing it to the world anyway. See incremental.HOWTO.txt for info. --- NEW FILE: corrected.py --- ### ### This is a training regime for the incremental.py harness. ### It does guess-based training on all messages, followed by ### correction to perfect at the end of each group. ### class Regime: def __init__(self): self.spam_to_ham = [] self.ham_to_spam = [] self.unsure_to_ham = [] self.unsure_to_spam = [] def group_action(self, which, test): test.untrain(self.ham_to_spam, self.spam_to_ham) test.train(self.spam_to_ham, self.ham_to_spam) test.train(self.unsure_to_ham, self.unsure_to_spam) self.spam_to_ham = [] self.ham_to_spam = [] self.unsure_to_ham = [] self.unsure_to_spam = [] def guess_action(self, which, test, guess, actual, msg): if guess[0] != actual: if actual < 0: if guess[0] == 0: self.unsure_to_spam.append(msg) else: self.ham_to_spam.append(msg) else: if guess[0] == 0: self.unsure_to_ham.append(msg) else: self.spam_to_ham.append(msg) return guess[0] --- NEW FILE: dotest.sh --- for j in 1 2 3 4 5; do python2.2 incremental.py -s $j -r $1 > output/$1$j.out python2.2 mkgraph.py < output/$1$j.out > output/$1$j.mtv plotmtv -colorps -o output/$1$j.ps -noxplot output/$1$j.mtv gs -sDEVICE=png256 -sOutputFile=output/$1$j.png output/$1$j.ps < /dev/null done --- NEW FILE: es2hs.py --- #! /usr/bin/env python ### Set up the classic Data/Ham/reservior and Data/Spam/reservior ### directories based from MH mailboxes ~/Mail/everything and ### ~/Mail/spam. """Usage: %(program)s [OPTIONS] ... Where OPTIONS is one or more of: -h show usage and exit -e PATH directory of all messages (ham and spam). -s PATH directory of known spam messages. These should be duplicates of messages in the everything folder. Can be specified more than once. """ import getopt import sys import os import filecmp import shutil program = sys.argv[0] loud = True day = 24 * 60 * 60 # The following are in days expire = 4 * 30 grouping = 2 hamdir = "Data/Ham/reservoir" spamdir = "Data/Spam/reservoir" def usage(code, msg=''): """Print usage message and sys.exit(code).""" if msg: print >> sys.stderr, msg print >> sys.stderr print >> sys.stderr, __doc__ % globals() sys.exit(code) def main(): """Main program; parse options and go.""" global loud everything = None spam = [] try: opts, args = getopt.getopt(sys.argv[1:], 'hs:e:') except getopt.error, msg: usage(2, msg) if opts: for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-e': everything = arg elif opt == '-s': spam.append(arg) if args: usage(2, "Positional arguments not allowed") else: everything = os.path.expanduser("~/Mail/everything") spam = [os.path.expanduser("~/Mail/spam"), os.path.expanduser("~/Mail/newspam")] spamsizes = {} for s in spam: if loud: print "Scanning spamdir (%s):" % s files = os.listdir(s) for f in files: if f[0] in ('1', '2', '3', '4', '5', '6', '7', '8', '9'): name = os.path.join(s, f) size = os.stat(name).st_size try: spamsizes[size].append(name) except KeyError: spamsizes[size] = [name] os.makedirs(spamdir) os.makedirs(hamdir) if loud: print "Scanning everything" for f in os.listdir(everything): if f[0] in ('1', '2', '3', '4', '5', '6', '7', '8', '9'): name = os.path.join(everything, f) size = os.stat(name).st_size isspam = False try: for s in spamsizes[size]: if filecmp.cmp(name, s): isspam = True except KeyError: pass if isspam: shutil.copyfile(name, os.path.join(spamdir, f)) else: shutil.copyfile(name, os.path.join(hamdir, f)) if __name__ == "__main__": main() --- NEW FILE: incremental.HOWTO.txt --- Yes, this is a lame attempt at explaining what I've built, in the vain hope that someone will read it and improve it. I'm writing this with only about 4 hours sleep, so my coherency may not be particularly high. There are a few steps to doing incremental training tests: 1. Get your corpora. It's best if they're contemporaneous and single source, because that makes it much easier to sequence and group them. The corpora need to be in the good old familiar Data/{Ham,Spam}/{reservior,Set*} tree. For my purposes, I wrote the es2hs.py tool to grab stuff out of my real MH mail archive folders; other people may want some other method of getting the corpora into the tree. 2. Sort and group the corpora. When testing, messages will be processed in sorted order. The messages should all have unique names with a group number and an id number separated by a dash (eg. 0123-004556). I wrote sort+group.py for this. sort+group.py sorts the messages into chronological order (by topmost Received header) and then groups them by 24-hour period. 3. Distribute the corpora into multiple sets so you can do multiple similar runs to gauge validity of the results (similar to a cross-validation, but not really). When testing, all but one set will be used for a particular run. I personally use 5 sets. Distribution is done with mksets.py. It will evenly distribute the corpora across the sets, keeping the groups evenly distributed, too. You can specify the number of sets, limit the number of groups used (to make short runs), and limit the number of messages per group*set distributed (to simulate less mail per group, and thus get more fine-grained results). 4. Run incremental.py to actually process the messages in a training and testing run. How training is done is determined by what regime you specify (regimes are defined in helper .py files; see perfect.py and corrected.py for examples). For large corpora, you may want to do the various set runs separately (by specifying the -s option), instead of building nsets classifiers all in parallel (memory usage can get high). Make sure to save the output of incremental.py into a file... by itself it's ugly, but postprocessing can make it useful. 5. Postprocess the incremental.py output. I made mkgraph.py to do this, outputting datasets for plotmtv. plotmtv is a really neat data visualization tool. Use it. Love it. Gods, I need more sleep. See dotest.sh for a sample of automating steps 4 & 5. Please, somebody rewrite this file. --- NEW FILE: incremental.TODO.txt --- -r for rules: import named file, get functions from it -- DONE -t for transitions: import named file, control switching regimes init called for both -r and -t, with unrecognized args between it and next record db size? interesting rules: perfect -- DONE corrected -- DONE fpfnunsure fnunsure aging (separate from rules, so can mix it with others?) interesting transitions: specific group number error rate < n fp rate < n fn rate < n unsure rate < n graphing: separate files for each graph cumulative vs span totals --- NEW FILE: incremental.py --- ### ### This is a test harness for doing testing of incremental ### training regimes. The individual regimes used should ### be specified in separate files; see perfect.py and ### corrected.py for examples. ### import getopt import glob import os import sys sys.path.insert(-1, os.getcwd()) sys.path.insert(-1, os.path.dirname(os.getcwd())) from spambayes.Options import options from spambayes import classifier from spambayes import msgs import email from email import Message try: True, False except NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0 class Test: # Pass a classifier instance (an instance of Bayes). # Loop: # # Train the classifer with new ham and spam. # train(ham, spam) # this implies reset_test_results # Loop: # Optional: # # Possibly fiddle the classifier. # set_classifier() # # Forget smessages the classifier was trained on. # untrain(ham, spam) # this implies reset_test_results # Optional: # reset_test_results() # # Predict against (presumably new) examples. # predict(ham, spam) # Optional: # suck out the results, via instance vrbls and # false_negative_rate(), false_positive_rate(), # false_negatives(), and false_positives() def __init__(self, classifier): self.set_classifier(classifier) self.reset_test_results() # Tell the tester which classifier to use. def set_classifier(self, classifier): self.classifier = classifier def reset_test_results(self): # The number of ham and spam instances tested. self.nham_tested = self.nspam_tested = 0 # The number of ham and spam instances trained. self.nham_trained = self.nspam_trained = 0 # The number of test instances correctly and incorrectly classified. self.nham_right = 0 self.nham_wrong = 0 self.nham_unsure = 0; self.nspam_right = 0 self.nspam_wrong = 0 self.nspam_unsure = 0; # Lists of bad predictions. self.ham_wrong_examples = [] # False positives: ham called spam. self.spam_wrong_examples = [] # False negatives: spam called ham. self.unsure_examples = [] # ham and spam in middle ground # Train the classifier on streams of ham and spam. def train(self, hamstream=None, spamstream=None): learn = self.classifier.learn if hamstream is not None: for example in hamstream: learn(example, False) self.nham_trained += 1 if spamstream is not None: for example in spamstream: learn(example, True) self.nspam_trained += 1 # Untrain the classifier on streams of ham and spam. def untrain(self, hamstream=None, spamstream=None): unlearn = self.classifier.unlearn if hamstream is not None: for example in hamstream: unlearn(example, False) self.nham_trained -= 1 if spamstream is not None: for example in spamstream: unlearn(example, True) self.nspam_trained -= 1 # Run prediction on each sample in stream. You're swearing that stream # is entirely composed of spam (is_spam True), or of ham (is_spam False). # Note that mispredictions are saved, and can be retrieved later via # false_negatives (spam mistakenly called ham) and false_positives (ham # mistakenly called spam). For this reason, you may wish to wrap examples # in a little class that identifies the example in a useful way, and whose # __iter__ produces a token stream for the classifier. # def predict(self, stream, is_spam): guess = self.classifier.spamprob for example in stream: prob = guess(example) is_ham_guessed = prob < options.ham_cutoff is_spam_guessed = prob >= options.spam_cutoff if is_spam: self.nspam_tested += 1 if is_spam_guessed: self.nspam_right += 1 elif is_ham_guessed: self.nspam_wrong += 1 self.spam_wrong_examples.append(example) else: self.nspam_unsure += 1 self.unsure_examples.append(example) else: self.nham_tested += 1 if is_ham_guessed: self.nham_right += 1 elif is_spam_guessed: self.nham_wrong += 1 self.ham_wrong_examples.append(example) else: self.nham_unsure += 1 self.unsure_examples.append(example) assert (self.nham_right + self.nham_wrong + self.nham_unsure == self.nham_tested) assert (self.nspam_right + self.nspam_wrong + self.nspam_unsure == self.nspam_tested) num = 0 if is_ham_guessed: num = 1 if is_spam_guessed: num = -1 return (num, prob) def false_positive_rate(self): """Percentage of ham mistakenly identified as spam, in 0.0..100.0.""" return self.nham_wrong * 1e2 / (self.nham_tested or 1) def false_negative_rate(self): """Percentage of spam mistakenly identified as ham, in 0.0..100.0.""" return self.nspam_wrong * 1e2 / (self.nspam_tested or 1) def unsure_rate(self): return ((self.nham_unsure + self.nspam_unsure) * 1e2 / ((self.nham_tested + self.nspam_tested) or 1)) def false_positives(self): return self.ham_wrong_examples def false_negatives(self): return self.spam_wrong_examples def unsures(self): return self.unsure_examples class _Example: def __init__(self, name, words): self.name = name self.words = words def __iter__(self): return iter(self.words) _easy_test = """ >>> from spambayes.classifier import Bayes >>> from spambayes.Options import options >>> options.ham_cutoff = options.spam_cutoff = 0.5 >>> good1 = _Example('', ['a', 'b', 'c']) >>> good2 = _Example('', ['a', 'b']) >>> bad1 = _Example('', ['c', 'd']) >>> t = Test(Bayes()) >>> t.train([good1, good2], [bad1]) >>> t.predict([_Example('goodham', ['a', 'b']), ... _Example('badham', ['d']) # FP ... ], False) >>> t.predict([_Example('goodspam', ['d']), ... _Example('badspam1', ['a']), # FN ... _Example('badspam2', ['a', 'b']), # FN ... _Example('badspam3', ['d', 'a', 'b']) # FN ... ], True) >>> t.nham_tested 2 >>> t.nham_right, t.nham_wrong (1, 1) >>> t.false_positive_rate() 50.0 >>> [e.name for e in t.false_positives()] ['badham'] >>> t.nspam_tested 4 >>> t.nspam_right, t.nspam_wrong (1, 3) >>> t.false_negative_rate() 75.0 >>> [e.name for e in t.false_negatives()] ['badspam1', 'badspam2', 'badspam3'] >>> [e.name for e in t.unsures()] [] >>> t.unsure_rate() 0.0 """ __test__ = {'easy': _easy_test} def _test(): import doctest, Tester doctest.testmod(Tester) def group_perfect(which, test): pass def guess_perfect(which, test, guess, actual, msg): return actual spam_to_ham = [] ham_to_spam = [] unsure_to_ham = [] unsure_to_spam = [] def group_corrected(which, test): global spam_to_ham global ham_to_spam global unsure_to_ham global unsure_to_spam test.untrain(ham_to_spam[which], spam_to_ham[which]) test.train(spam_to_ham[which], ham_to_spam[which]) test.train(unsure_to_ham[which], unsure_to_spam[which]) def guess_corrected(which, test, guess, actual, msg): global spam_to_ham global ham_to_spam global unsure_to_ham global unsure_to_spam if guess[0] != actual: if actual < 0: if guess == 0: try: unsure_to_spam[which].append(msg) except: unsure_to_spam[which] = [msg] else: try: ham_to_spam[which].append(msg) except: ham_to_spam[which] = [msg] else: if guess == 0: try: unsure_to_ham[which].append(msg) except: unsure_to_ham[which] = [msg] else: try: spam_to_ham[which].append(msg) except: spam_to_ham[which] = [msg] return guess[0] def main(): group_action = None guess_action = None regime = "perfect" which = None opts, args = getopt.getopt(sys.argv[1:], 's:r:', ['help', 'examples']) for opt, arg in opts: if opt == '-s': which = int(arg) - 1 if opt == '-r': regime = arg nsets = len(glob.glob("Data/Ham/Set*")) files = glob.glob("Data/*/Set*/*") files.sort(lambda a,b: cmp(os.path.basename(a), os.path.basename(b))) tests = [] regimes = [] nham_tested = [] nham_trained = [] nham_right = [] nham_wrong = [] nham_unsure = [] nspam_tested = [] nspam_trained = [] nspam_right = [] nspam_wrong = [] nspam_unsure = [] for j in range(0, nsets): # if which is not None and j != which: # continue tests.append(Test(classifier.Bayes())) exec """ import %s regimes.append(%s.Regime()) """ % (regime, regime) in globals(), locals() nham_tested.append([]) nham_trained.append([]) nham_right.append([]) nham_wrong.append([]) nham_unsure.append([]) nspam_tested.append([]) nspam_trained.append([]) nspam_right.append([]) nspam_wrong.append([]) nspam_unsure.append([]) oldgroup = 0 for f in files: base = os.path.basename(f) group = int(base.split('-')[0]); dir = os.path.dirname(f) set = os.path.basename(dir) set = int(set[3:]) - 1 isspam = (dir.find('Spam') >= 0) sys.stderr.write("%-78s\r" % ("%s : %d" % (base, set))) sys.stderr.flush() msg = msgs.Msg(dir, base) for j in range(0, nsets): if which is not None and j != which: continue if group != oldgroup: nham_tested[j].append(tests[j].nham_tested) nham_trained[j].append(tests[j].nham_trained) nham_right[j].append(tests[j].nham_right) nham_wrong[j].append(tests[j].nham_wrong) nham_unsure[j].append(tests[j].nham_unsure) nspam_tested[j].append(tests[j].nspam_tested) nspam_trained[j].append(tests[j].nspam_trained) nspam_right[j].append(tests[j].nspam_right) nspam_wrong[j].append(tests[j].nspam_wrong) nspam_unsure[j].append(tests[j].nspam_unsure) # tests[j].reset_test_results() regimes[j].group_action(j, tests[j]) if j != set: guess = tests[j].predict([msg], isspam) if isspam: actual = -1 else: actual = 1 todo = regimes[j].guess_action(j, tests[j], guess, actual, msg) if todo == -1: tests[j].train(None, [msg]) elif todo == 1: tests[j].train([msg], None) oldgroup = group sys.stderr.write("\n") sys.stderr.flush() for j in range(0, nsets): if which is not None and j != which: continue nham_tested[j].append(tests[j].nham_tested) nham_trained[j].append(tests[j].nham_trained) nham_right[j].append(tests[j].nham_right) nham_wrong[j].append(tests[j].nham_wrong) nham_unsure[j].append(tests[j].nham_unsure) nspam_tested[j].append(tests[j].nspam_tested) nspam_trained[j].append(tests[j].nspam_trained) nspam_right[j].append(tests[j].nspam_right) nspam_wrong[j].append(tests[j].nspam_wrong) nspam_unsure[j].append(tests[j].nspam_unsure) for j in range(0, nsets): if which is not None and j != which: continue print 'Set %d' % (j + 1) for k in range(0, len(nham_tested[j])): print '%d %d %d %d %d %d %d %d %d %d' % ( nham_tested[j][k], nham_trained[j][k], nham_right[j][k], nham_wrong[j][k], nham_unsure[j][k], nspam_tested[j][k], nspam_trained[j][k], nspam_right[j][k], nspam_wrong[j][k], nspam_unsure[j][k] ) print print '$ end' if __name__ == '__main__': main() --- NEW FILE: mkgraph.py --- import sys set = "" nham_tested = [] nham_trained = [] nham_right = [] nham_wrong = [] nham_unsure = [] nspam_tested = [] nspam_trained = [] nspam_right = [] nspam_wrong = [] nspam_unsure = [] def outputset(): global set global nham_tested global nham_trained global nham_right global nham_wrong global nham_unsure global nspam_tested global nspam_trained global nspam_right global nspam_wrong global nspam_unsure if set == "": return print '$ Data=Curve2d name="Set %s Cumulative"' % set print '% linetype=1 linelabel="ham_tested" markertype=0 linecolor=0' for k in range(0, len(nham_tested)): print '%d %d' % (k, nham_tested[k]) print print '% linetype=1 linelabel="ham_trained" markertype=0 linecolor=1' for k in range(0, len(nham_trained)): print '%d %d' % (k, nham_trained[k]) print print '% linetype=1 linelabel="ham_right" markertype=0 linecolor=2' for k in range(0, len(nham_right)): print '%d %d' % (k, nham_right[k]) print print '% linetype=1 linelabel="ham_wrong" markertype=0 linecolor=3' for k in range(0, len(nham_wrong)): print '%d %d' % (k, nham_wrong[k]) print print '% linetype=1 linelabel="ham_unsure" markertype=0 linecolor=4' for k in range(0, len(nham_unsure)): print '%d %d' % (k, nham_unsure[k]) print print '% linetype=1 linelabel="spam_tested" markertype=0 linecolor=5' for k in range(0, len(nspam_tested)): print '%d %d' % (k, nspam_tested[k]) print print '% linetype=1 linelabel="spam_trained" markertype=0 linecolor=6' for k in range(0, len(nspam_trained)): print '%d %d' % (k, nspam_trained[k]) print print '% linetype=1 linelabel="spam_right" markertype=0 linecolor=7' for k in range(0, len(nspam_right)): print '%d %d' % (k, nspam_right[k]) print print '% linetype=1 linelabel="spam_wrong" markertype=0 linecolor=8' for k in range(0, len(nspam_wrong)): print '%d %d' % (k, nspam_wrong[k]) print print '% linetype=1 linelabel="spam_unsure" markertype=0 linecolor=9' for k in range(0, len(nspam_unsure)): print '%d %d' % (k, nspam_unsure[k]) print print '$ Data=Curve2d name="Set %s Cumulative Error Rates"' % set print '% linetype=1 linelabel="fp" markertype=0 linecolor=0' for k in range(0, len(nham_wrong)): print '%d %f' % (k, (nham_wrong[k] * 1.0 / (nham_tested[k] or 1))) print print '% linetype=1 linelabel="fn" markertype=0 linecolor=1' for k in range(0, len(nspam_wrong)): print '%d %f' % (k, (nspam_wrong[k] * 1.0 / (nspam_tested[k] or 1))) print print '% linetype=1 linelabel="fn" markertype=0 linecolor=2' for k in range(0, len(nspam_unsure)): print '%d %f' % (k, ((nspam_unsure[k] + nham_unsure[k]) * 1.0 / ((nspam_tested[k] + nham_tested[k]) or 1))) print set = "" nham_tested = [] nham_trained = [] nham_right = [] nham_wrong = [] nham_unsure = [] nspam_tested = [] nspam_trained = [] nspam_right = [] nspam_wrong = [] nspam_unsure = [] def main(): global set global nham_tested global nham_trained global nham_right global nham_wrong global nham_unsure global nspam_tested global nspam_trained global nspam_right global nspam_wrong global nspam_unsure while 1: line = sys.stdin.readline() if line == "": break if line.endswith("\n"): line = line[:-1] print "# " + line if line.startswith("Set "): outputset() set = line[4:] if len(line) > 0 and (line[0] in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')): vals = line.split(" ") nham_tested.append(int(vals[0])) nham_trained.append(int(vals[1])) nham_right.append(int(vals[2])) nham_wrong.append(int(vals[3])) nham_unsure.append(int(vals[4])) nspam_tested.append(int(vals[5])) nspam_trained.append(int(vals[6])) nspam_right.append(int(vals[7])) nspam_wrong.append(int(vals[8])) nspam_unsure.append(int(vals[9])) outputset() if __name__ == "__main__": main() --- NEW FILE: perfect.py --- ### ### This is a training regime for the incremental.py harness. ### It does perfect training on all messages. ### class Regime: def __init__(self): pass def group_action(self, which, test): pass def guess_action(self, which, test, guess, actual, msg): return actual --- NEW FILE: sort+group.py --- #! /usr/bin/env python ### Sort and group the messages in the Data hierarchy. ### Run this prior to mksets.py for setting stuff up for ### testing of chronological incremental training. """Usage: %(program)s This program has no options! Muahahahaha! """ import sys import os import getopt import glob import re import time import filecmp program = sys.argv[0] loud = True day = 24 * 60 * 60 dates = {} def usage(code, msg=''): """Print usage message and sys.exit(code).""" if msg: print >> sys.stderr, msg print >> sys.stderr print >> sys.stderr, __doc__ % globals() sys.exit(code) def bydate(name1, name2): return cmp(dates[name1], dates[name2]) def main(): """Main program; parse options and go.""" global dates dates = {} names = [] date_re = re.compile( r";[^0]* (\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d{2,4})") now = time.mktime(time.strptime(time.strftime("%d %b %Y"), "%d %b %Y")) if loud: print "Scanning everything" for name in glob.glob('Data/*/*/*'): if loud: sys.stdout.write("%-78s\r" % name) sys.stdout.flush() fh = file(name, "rb") received = "" line = fh.readline() while line != "\r\n" and line != "\n" and line != "": if line.lower().startswith("received:"): received = line line = fh.readline() while line != "" and (line[0] == " " or line[0] == "\t"): received += line line = fh.readline() break line = fh.readline() fh.close() # Figure out how old the message is date = now try: log = str(received) received = date_re.search(received).group(1) log = "\n" + str(received) date = time.mktime(time.strptime(received, "%d %b %Y")) except: print "Couldn't parse " + name + ":" print log pass dates[name] = date names.append(name) if loud: print "" if loud: print "Sorting" names.sort(bydate) if loud: print "Renaming first pass" for name in names: dir = os.path.dirname(name) base = os.path.basename(name) os.rename(name, os.path.join(dir, "-"+base)) if loud: print "Renaming second pass" first = dates[names[0]] for num in range(0, len(names)): name = names[num] dir = os.path.dirname(name) base = os.path.basename(name) group = int((dates[name] - first) // day) os.rename(os.path.join(dir, "-"+base), os.path.join(dir, "%04d-%06d" % (group, num))) if __name__ == "__main__": main() From timstone4 at users.sourceforge.net Thu Feb 27 20:19:22 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Thu Feb 27 23:19:26 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.55,1.56 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv17118 Modified Files: pop3proxy.py Log Message: Fix for bug 693423, digest message crashes review page creation Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.55 retrieving revision 1.56 diff -C2 -d -r1.55 -r1.56 *** pop3proxy.py 27 Feb 2003 04:13:31 -0000 1.55 --- pop3proxy.py 28 Feb 2003 04:19:20 -0000 1.56 *************** *** 841,847 **** except StopIteration: text = '(this message has no text body)' ! text = text.replace(' ', ' ') # Else they'll be quoted ! text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines ! text = text.strip() class _MessageInfo: --- 841,850 ---- except StopIteration: text = '(this message has no text body)' ! if type(text) == type([]): # gotta be a 'right' way to do this ! text = "(this message is a digest of %s messages)" % (len(text)) ! else: ! text = text.replace(' ', ' ') # Else they'll be quoted ! text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines ! text = text.strip() class _MessageInfo: From timstone4 at users.sourceforge.net Thu Feb 27 20:25:05 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Thu Feb 27 23:25:08 2003 Subject: [Spambayes-checkins] spambayes pop3proxy.py,1.56,1.57 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv18728 Modified Files: pop3proxy.py Log Message: Corrected typo on smtpproxy import Index: pop3proxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v retrieving revision 1.56 retrieving revision 1.57 diff -C2 -d -r1.56 -r1.57 *** pop3proxy.py 28 Feb 2003 04:19:20 -0000 1.56 --- pop3proxy.py 28 Feb 2003 04:25:02 -0000 1.57 *************** *** 1636,1640 **** if launchSMTPProxy: ! from smtproxy import LoadServerInfo, CreateProxies servers, proxyPorts = LoadServerInfo() CreateProxies(servers, proxyPorts, state) --- 1636,1640 ---- if launchSMTPProxy: ! from smtpproxy import LoadServerInfo, CreateProxies servers, proxyPorts = LoadServerInfo() CreateProxies(servers, proxyPorts, state) From popiel at users.sourceforge.net Fri Feb 28 09:35:32 2003 From: popiel at users.sourceforge.net (T. Alexander Popiel) Date: Fri Feb 28 12:35:36 2003 Subject: [Spambayes-checkins] spambayes/testtools regimes.py,NONE,1.1 incremental.HOWTO.txt,1.1,1.2 incremental.TODO.txt,1.1,1.2 incremental.py,1.1,1.2 corrected.py,1.1,NONE perfect.py,1.1,NONE Message-ID: Update of /cvsroot/spambayes/spambayes/testtools In directory sc8-pr-cvs1:/tmp/cvs-serv20756 Modified Files: incremental.HOWTO.txt incremental.TODO.txt incremental.py Added Files: regimes.py Removed Files: corrected.py perfect.py Log Message: Put all regimes into regimes.py, instead of spreading them out over multiple files. Also, define fpfnunsure and fnunsure regimes. --- NEW FILE: regimes.py --- ### ### This is a training regime for the incremental.py harness. ### It does perfect training on all messages. ### class perfect: def __init__(self): pass def group_action(self, which, test): pass def guess_action(self, which, test, guess, actual, msg): return actual ### ### This is a training regime for the incremental.py harness. ### It does guess-based training on all messages, followed by ### correction to perfect at the end of each group. ### class corrected: def __init__(self): self.spam_to_ham = [] self.ham_to_spam = [] self.unsure_to_ham = [] self.unsure_to_spam = [] def group_action(self, which, test): test.untrain(self.ham_to_spam, self.spam_to_ham) test.train(self.spam_to_ham, self.ham_to_spam) test.train(self.unsure_to_ham, self.unsure_to_spam) self.spam_to_ham = [] self.ham_to_spam = [] self.unsure_to_ham = [] self.unsure_to_spam = [] def guess_action(self, which, test, guess, actual, msg): if guess[0] != actual: if actual < 0: if guess[0] == 0: self.unsure_to_spam.append(msg) else: self.ham_to_spam.append(msg) else: if guess[0] == 0: self.unsure_to_ham.append(msg) else: self.spam_to_ham.append(msg) return guess[0] ### ### This is a training regime for the incremental.py harness. ### It does perfect training for fp, fn, and unsures. ### class fpfnunsure: def __init__(self): pass def group_action(self, which, test): pass def guess_action(self, which, test, guess, actual, msg): if guess[0] != actual: return actual return 0 ### ### This is a training regime for the incremental.py harness. ### It does perfect training for fn, and unsures, leaving ### false positives broken. ### class fnunsure: def __init__(self): pass def group_action(self, which, test): pass def guess_action(self, which, test, guess, actual, msg): if guess[0] != actual and guess[0] >= 0: return actual return 0 Index: incremental.HOWTO.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.HOWTO.txt,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** incremental.HOWTO.txt 28 Feb 2003 00:02:45 -0000 1.1 --- incremental.HOWTO.txt 28 Feb 2003 17:35:28 -0000 1.2 *************** *** 42,50 **** a training and testing run. How training is done is determined by what regime you specify (regimes are ! defined in helper .py files; see perfect.py and corrected.py ! for examples). For large corpora, you may want to do ! the various set runs separately (by specifying the -s ! option), instead of building nsets classifiers all ! in parallel (memory usage can get high). Make sure to save the output of incremental.py into --- 42,51 ---- a training and testing run. How training is done is determined by what regime you specify (regimes are ! defined in the regimes.py file; see the perfect and ! corrected classes for examples). For large corpora, ! you may want to do the various set runs separately ! (by specifying the -s option), instead of building ! nsets classifiers all in parallel (memory usage can ! get high). Make sure to save the output of incremental.py into Index: incremental.TODO.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.TODO.txt,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** incremental.TODO.txt 28 Feb 2003 00:02:45 -0000 1.1 --- incremental.TODO.txt 28 Feb 2003 17:35:28 -0000 1.2 *************** *** 7,12 **** perfect -- DONE corrected -- DONE ! fpfnunsure ! fnunsure aging (separate from rules, so can mix it with others?) --- 7,12 ---- perfect -- DONE corrected -- DONE ! fpfnunsure -- DONE ! fnunsure -- DONE aging (separate from rules, so can mix it with others?) Index: incremental.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** incremental.py 28 Feb 2003 00:02:45 -0000 1.1 --- incremental.py 28 Feb 2003 17:35:29 -0000 1.2 *************** *** 2,7 **** ### This is a test harness for doing testing of incremental ### training regimes. The individual regimes used should ! ### be specified in separate files; see perfect.py and ! ### corrected.py for examples. ### --- 2,7 ---- ### This is a test harness for doing testing of incremental ### training regimes. The individual regimes used should ! ### be specified in regime.py; see the perfect and ! ### corrected classes for examples. ### *************** *** 19,22 **** --- 19,23 ---- import email from email import Message + from testtools import regimes try: *************** *** 312,319 **** # continue tests.append(Test(classifier.Bayes())) ! exec """ ! import %s ! regimes.append(%s.Regime()) ! """ % (regime, regime) in globals(), locals() nham_tested.append([]) nham_trained.append([]) --- 313,317 ---- # continue tests.append(Test(classifier.Bayes())) ! exec """regimes.append(regimes.%s())""" % (regime) in globals(), locals() nham_tested.append([]) nham_trained.append([]) --- corrected.py DELETED --- --- perfect.py DELETED --- From popiel at users.sourceforge.net Fri Feb 28 09:52:04 2003 From: popiel at users.sourceforge.net (T. Alexander Popiel) Date: Fri Feb 28 12:52:08 2003 Subject: [Spambayes-checkins] spambayes/testtools incremental.py,1.2,1.3 Message-ID: Update of /cvsroot/spambayes/spambayes/testtools In directory sc8-pr-cvs1:/tmp/cvs-serv30172 Modified Files: incremental.py Log Message: Silly me. testtools doesn't work as a module, messing up the import. Index: incremental.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** incremental.py 28 Feb 2003 17:35:29 -0000 1.2 --- incremental.py 28 Feb 2003 17:52:01 -0000 1.3 *************** *** 19,23 **** import email from email import Message ! from testtools import regimes try: --- 19,23 ---- import email from email import Message ! import regimes try: From popiel at users.sourceforge.net Fri Feb 28 09:57:57 2003 From: popiel at users.sourceforge.net (T. Alexander Popiel) Date: Fri Feb 28 12:58:00 2003 Subject: [Spambayes-checkins] spambayes/testtools incremental.py,1.3,1.4 Message-ID: Update of /cvsroot/spambayes/spambayes/testtools In directory sc8-pr-cvs1:/tmp/cvs-serv32368 Modified Files: incremental.py Log Message: Fix name conflict between regimes list and regimes source file. Reduce the amount of progress output, speeding it up... Index: incremental.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** incremental.py 28 Feb 2003 17:52:01 -0000 1.3 --- incremental.py 28 Feb 2003 17:57:53 -0000 1.4 *************** *** 298,302 **** tests = [] ! regimes = [] nham_tested = [] nham_trained = [] --- 298,302 ---- tests = [] ! rules = [] nham_tested = [] nham_trained = [] *************** *** 313,317 **** # continue tests.append(Test(classifier.Bayes())) ! exec """regimes.append(regimes.%s())""" % (regime) in globals(), locals() nham_tested.append([]) nham_trained.append([]) --- 313,317 ---- # continue tests.append(Test(classifier.Bayes())) ! exec """rules.append(regimes.%s())""" % (regime) in globals(), locals() nham_tested.append([]) nham_trained.append([]) *************** *** 334,340 **** isspam = (dir.find('Spam') >= 0) - sys.stderr.write("%-78s\r" % ("%s : %d" % (base, set))) - sys.stderr.flush() - msg = msgs.Msg(dir, base) --- 334,337 ---- *************** *** 343,346 **** --- 340,346 ---- continue if group != oldgroup: + sys.stderr.write("%-78s\r" % ("%s : %d" % (base, set))) + sys.stderr.flush() + nham_tested[j].append(tests[j].nham_tested) nham_trained[j].append(tests[j].nham_trained) *************** *** 354,358 **** nspam_unsure[j].append(tests[j].nspam_unsure) # tests[j].reset_test_results() ! regimes[j].group_action(j, tests[j]) if j != set: --- 354,358 ---- nspam_unsure[j].append(tests[j].nspam_unsure) # tests[j].reset_test_results() ! rules[j].group_action(j, tests[j]) if j != set: *************** *** 362,366 **** else: actual = 1 ! todo = regimes[j].guess_action(j, tests[j], guess, actual, msg) if todo == -1: tests[j].train(None, [msg]) --- 362,366 ---- else: actual = 1 ! todo = rules[j].guess_action(j, tests[j], guess, actual, msg) if todo == -1: tests[j].train(None, [msg]) From timstone4 at users.sourceforge.net Fri Feb 28 10:06:03 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Fri Feb 28 13:06:07 2003 Subject: [Spambayes-checkins] spambayes dbExpImp.py,1.4,1.5 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv3520 Modified Files: dbExpImp.py Log Message: Added Python 2.2 compat code Index: dbExpImp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/dbExpImp.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** dbExpImp.py 8 Feb 2003 17:17:02 -0000 1.4 --- dbExpImp.py 28 Feb 2003 18:05:58 -0000 1.5 *************** *** 90,93 **** --- 90,99 ---- __author__ = "Tim Stone " + try: + True, False + except NameError: + # Maintain compatibility with Python 2.2 + True, False = 1, 0 + from __future__ import generators From timstone4 at users.sourceforge.net Fri Feb 28 10:07:22 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Fri Feb 28 13:07:25 2003 Subject: [Spambayes-checkins] spambayes mboxtrain.py,1.4,1.5 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv3865 Modified Files: mboxtrain.py Log Message: Added Python 2.2 compat code Index: mboxtrain.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/mboxtrain.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** mboxtrain.py 29 Jan 2003 03:23:34 -0000 1.4 --- mboxtrain.py 28 Feb 2003 18:06:45 -0000 1.5 *************** *** 35,38 **** --- 35,44 ---- """ + try: + True, False + except NameError: + # Maintain compatibility with Python 2.2 + True, False = 1, 0 + import sys, os, getopt from spambayes import hammie, mboxutils From timstone4 at users.sourceforge.net Fri Feb 28 10:08:55 2003 From: timstone4 at users.sourceforge.net (Tim Stone) Date: Fri Feb 28 13:09:02 2003 Subject: [Spambayes-checkins] spambayes setup.py,1.16,1.17 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1:/tmp/cvs-serv4753 Modified Files: setup.py Log Message: Added installation for smtpproxy and mboxtrain Index: setup.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/setup.py,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** setup.py 3 Feb 2003 07:54:13 -0000 1.16 --- setup.py 28 Feb 2003 18:08:51 -0000 1.17 *************** *** 38,43 **** --- 38,45 ---- 'hammiefilter.py', 'pop3proxy.py', + 'smtpproxy.py', 'proxytee.py', 'dbExpImp.py', + 'mboxtrain.py', ], packages = [ From popiel at users.sourceforge.net Fri Feb 28 21:13:10 2003 From: popiel at users.sourceforge.net (T. Alexander Popiel) Date: Sat Mar 1 00:13:18 2003 Subject: [Spambayes-checkins] spambayes/testtools mksets.py,NONE,1.1 incremental.TODO.txt,1.2,1.3 mkgraph.py,1.1,1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/testtools In directory sc8-pr-cvs1:/tmp/cvs-serv24670 Modified Files: incremental.TODO.txt mkgraph.py Added Files: mksets.py Log Message: Actually include mksets.py. Doh. Give mkgraph.py a few options, so it spits out either counts or error rates, but not both. Also, it can do n-day averages instead of cumulative. --- NEW FILE: mksets.py --- #! /usr/bin/env python ### Redistribute messages among the classic Data/*/Set* directories ### based on desired set count, desired with messages ### directories based from MH mailboxes ~/Mail/everything and ### ~/Mail/spam. """Usage: %(program)s [OPTIONS] ... Where OPTIONS is one or more of: -h show usage and exit -s num random number seed -n num number of sets -g num number of groups -m num number of messages per {ham,spam}*group*set """ import getopt import sys import os import os.path import glob import shutil import random program = sys.argv[0] loud = True hamdir = "Data/Ham" spamdir = "Data/Spam" nsets = 5 ngroups = None nmess = None def usage(code, msg=''): """Print usage message and sys.exit(code).""" if msg: print >> sys.stderr, msg print >> sys.stderr print >> sys.stderr, __doc__ % globals() sys.exit(code) def bybasename(a, b): return cmp(os.path.basename(a).split("-", 2)[0], os.path.basename(b).split("-", 2)[0]) def distribute(dir): files = glob.glob(os.path.join(dir, "*", "*")) random.shuffle(files) files.sort(bybasename) trash = glob.glob(os.path.join(dir, "Set*")) for set in range(1, nsets + 1): name = os.path.join(dir, "Set%d" % set) try: os.makedirs(name) except: pass try: trash.remove(name) except: pass try: os.makedirs(os.path.join(dir, "reservoir")) except: pass oldgroup = "" cgroups = 0 cmess = 0 cset = 1 for f in files: newgroup = (f.split('-'))[0] if newgroup != oldgroup: oldgroup = newgroup cgroups = cgroups + 1 cmess = 0 cmess = cmess + 1 if ((ngroups is not None and cgroups > ngroups) or (nmess is not None and cmess > (nmess * nsets))): newname = os.path.join(dir, "reservoir", os.path.basename(f)) else: newname = os.path.join(dir, "Set%d" % cset, os.path.basename(f)) cset = (cset % nsets) + 1 sys.stdout.write("%-78s\r" % ("Moving %s to %s" % (f, newname))) sys.stdout.flush() if f != newname: os.rename(f, newname) for f in trash: os.rmdir(f) def main(): """Main program; parse options and go.""" global loud global nsets global ngroups global nmess try: opts, args = getopt.getopt(sys.argv[1:], 'hs:n:g:m:') except getopt.error, msg: usage(2, msg) if opts: for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-s': random.seed(int(arg)) elif opt == '-n': nsets = int(arg) elif opt == '-g': ngroups = int(arg) elif opt == '-m': nmess = int(arg) if args: usage(2, "Positional arguments not allowed") distribute(hamdir) distribute(spamdir) print if __name__ == "__main__": main() Index: incremental.TODO.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/incremental.TODO.txt,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** incremental.TODO.txt 28 Feb 2003 17:35:28 -0000 1.2 --- incremental.TODO.txt 1 Mar 2003 05:13:07 -0000 1.3 *************** *** 20,23 **** graphing: ! separate files for each graph ! cumulative vs span totals --- 20,23 ---- graphing: ! separate files for each graph -- DONE ! cumulative vs span totals -- DONE Index: mkgraph.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/testtools/mkgraph.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** mkgraph.py 28 Feb 2003 00:02:45 -0000 1.1 --- mkgraph.py 1 Mar 2003 05:13:08 -0000 1.2 *************** *** 1,3 **** --- 1,7 ---- import sys + import getopt + + report = "error" + span = None set = "" *************** *** 13,17 **** --- 17,33 ---- nspam_unsure = [] + def line(vals): + global span + for k in range(0, len(vals)): + n = vals[k] + if span and k - span >= 0: + n -= vals[k - span] + print '%d %d' % (k, n) + print + + def outputset(): + global report + global span global set global nham_tested *************** *** 29,88 **** return ! print '$ Data=Curve2d name="Set %s Cumulative"' % set ! print '% linetype=1 linelabel="ham_tested" markertype=0 linecolor=0' ! for k in range(0, len(nham_tested)): ! print '%d %d' % (k, nham_tested[k]) ! print ! print '% linetype=1 linelabel="ham_trained" markertype=0 linecolor=1' ! for k in range(0, len(nham_trained)): ! print '%d %d' % (k, nham_trained[k]) ! print ! print '% linetype=1 linelabel="ham_right" markertype=0 linecolor=2' ! for k in range(0, len(nham_right)): ! print '%d %d' % (k, nham_right[k]) ! print ! print '% linetype=1 linelabel="ham_wrong" markertype=0 linecolor=3' ! for k in range(0, len(nham_wrong)): ! print '%d %d' % (k, nham_wrong[k]) ! print ! print '% linetype=1 linelabel="ham_unsure" markertype=0 linecolor=4' ! for k in range(0, len(nham_unsure)): ! print '%d %d' % (k, nham_unsure[k]) ! print ! print '% linetype=1 linelabel="spam_tested" markertype=0 linecolor=5' ! for k in range(0, len(nspam_tested)): ! print '%d %d' % (k, nspam_tested[k]) ! print ! print '% linetype=1 linelabel="spam_trained" markertype=0 linecolor=6' ! for k in range(0, len(nspam_trained)): ! print '%d %d' % (k, nspam_trained[k]) ! print ! print '% linetype=1 linelabel="spam_right" markertype=0 linecolor=7' ! for k in range(0, len(nspam_right)): ! print '%d %d' % (k, nspam_right[k]) ! print ! print '% linetype=1 linelabel="spam_wrong" markertype=0 linecolor=8' ! for k in range(0, len(nspam_wrong)): ! print '%d %d' % (k, nspam_wrong[k]) ! print ! print '% linetype=1 linelabel="spam_unsure" markertype=0 linecolor=9' ! for k in range(0, len(nspam_unsure)): ! print '%d %d' % (k, nspam_unsure[k]) ! print ! ! print '$ Data=Curve2d name="Set %s Cumulative Error Rates"' % set ! print '% linetype=1 linelabel="fp" markertype=0 linecolor=0' ! for k in range(0, len(nham_wrong)): ! print '%d %f' % (k, (nham_wrong[k] * 1.0 / (nham_tested[k] or 1))) ! print ! print '% linetype=1 linelabel="fn" markertype=0 linecolor=1' ! for k in range(0, len(nspam_wrong)): ! print '%d %f' % (k, (nspam_wrong[k] * 1.0 / (nspam_tested[k] or 1))) ! print ! print '% linetype=1 linelabel="fn" markertype=0 linecolor=2' ! for k in range(0, len(nspam_unsure)): ! print '%d %f' % (k, ((nspam_unsure[k] + nham_unsure[k]) * 1.0 / ! ((nspam_tested[k] + nham_tested[k]) or 1))) ! print set = "" --- 45,109 ---- return ! if span: ! title = "%d-Day Average" % span ! else: ! title = "Cumulative" ! ! if report == "counts": ! print '$ Data=Curve2d name="%s Counts"' % (title) ! print '% linetype=1 linelabel="ham_tested" markertype=0 linecolor=0' ! line(nham_tested) ! print '% linetype=1 linelabel="ham_trained" markertype=0 linecolor=1' ! line(nham_trained) ! print '% linetype=1 linelabel="ham_right" markertype=0 linecolor=2' ! line(nham_right) ! print '% linetype=1 linelabel="ham_wrong" markertype=0 linecolor=3' ! line(nham_wrong) ! print '% linetype=1 linelabel="ham_unsure" markertype=0 linecolor=4' ! line(nham_unsure) ! print '% linetype=1 linelabel="spam_tested" markertype=0 linecolor=5' ! line(nspam_tested) ! print '% linetype=1 linelabel="spam_trained" markertype=0 linecolor=6' ! line(nspam_trained) ! print '% linetype=1 linelabel="spam_right" markertype=0 linecolor=7' ! line(nspam_right) ! print '% linetype=1 linelabel="spam_wrong" markertype=0 linecolor=8' ! line(nspam_wrong) ! print '% linetype=1 linelabel="spam_unsure" markertype=0 linecolor=9' ! line(nspam_unsure) ! ! if report == "error": ! print '$ Data=Curve2d' ! print '% toplabel="%s Error Rates"' % (title) ! print '% ymax=5' ! print '% xlabel="Days"' ! print '% ylabel="Percent"' ! print '% linetype=1 linelabel="fp" markertype=0 linecolor=0' ! for k in range(0, len(nham_wrong)): ! n = nham_wrong[k] ! d = nham_tested[k] ! if span and k - span >= 0: ! n -= nham_wrong[k - span] ! d -= nham_tested[k - span] ! print '%d %f' % (k, (n * 100.0 / (d or 1))) ! print ! print '% linetype=1 linelabel="fn" markertype=0 linecolor=1' ! for k in range(0, len(nspam_wrong)): ! n = nspam_wrong[k] ! d = nspam_tested[k] ! if span and k - span >= 0: ! n -= nspam_wrong[k - span] ! d -= nspam_tested[k - span] ! print '%d %f' % (k, (n * 100.0 / (d or 1))) ! print ! print '% linetype=1 linelabel="unsure" markertype=0 linecolor=2' ! for k in range(0, len(nspam_unsure)): ! n = nham_unsure[k] + nspam_unsure[k] ! d = nham_tested[k] + nspam_tested[k] ! if span and k - span >= 0: ! n -= nham_unsure[k - span] + nspam_unsure[k - span] ! d -= nham_tested[k - span] + nspam_tested[k - span] ! print '%d %f' % (k, (n * 100.0 / (d or 1))) ! print set = "" *************** *** 99,102 **** --- 120,125 ---- def main(): + global report + global span global set global nham_tested *************** *** 111,114 **** --- 134,148 ---- global nspam_unsure + opts, args = getopt.getopt(sys.argv[1:], 's:r:') + for opt, arg in opts: + if opt == '-s': + span = int(arg) + if opt == '-r': + report = arg + + if report not in ("error", "counts"): + print >> sys.stderr, "Unrecognized report type" + sys.exit(1) + while 1: line = sys.stdin.readline() *************** *** 117,121 **** if line.endswith("\n"): line = line[:-1] - print "# " + line if line.startswith("Set "): outputset() --- 151,154 ----