From montanaro at users.sourceforge.net Sat Jun 2 23:42:10 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sat, 02 Jun 2007 14:42:10 -0700 Subject: [Spambayes-checkins] spambayes/scripts core_server.py, 1.1.2.3, 1.1.2.4 Message-ID: <20070602214215.6AC421E4004@bag.python.org> Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv8063/scripts Modified Files: Tag: CORESVR core_server.py Log Message: a step closer - actually scored a message! Index: core_server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/Attic/core_server.py,v retrieving revision 1.1.2.3 retrieving revision 1.1.2.4 diff -C2 -d -r1.1.2.3 -r1.1.2.4 *** core_server.py 24 May 2007 03:19:34 -0000 1.1.2.3 --- core_server.py 2 Jun 2007 21:42:08 -0000 1.1.2.4 *************** *** 19,23 **** options: -h : Displays this help message. ! -m module : Identify plugin module to use (required) -d FILE : use the named DBM database file --- 19,23 ---- options: -h : Displays this help message. ! -P module : Identify plugin module to use (required) -d FILE : use the named DBM database file *************** *** 135,145 **** return '%s:%d' % (addr, port) ! def load_plugin(name): try: ! plugin = __import__(name) except ImportError: ! plugin = __import__("spambayes.%s" % name) ! plugin = getattr(plugin, name) ! return plugin.register() def main(state): --- 135,147 ---- return '%s:%d' % (addr, port) ! def load_plugin(name, state): try: ! plugin_module = __import__(name) except ImportError: ! plugin_module = __import__("spambayes.%s" % name) ! plugin_module = getattr(plugin_module, name) ! plugin = plugin_module.register() ! plugin.state = state ! return plugin def main(state): *************** *** 157,161 **** # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:m:') except getopt.error, msg: print >> sys.stderr, str(msg) + '\n\n' + __doc__ --- 159,163 ---- # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:P:') except getopt.error, msg: print >> sys.stderr, str(msg) + '\n\n' + __doc__ *************** *** 179,191 **** elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) ! elif opt == '-m': ! state.plugin = load_plugin(arg) if state.plugin is None: print >> sys.stderr, __doc__ sys.exit() - state.db_name, state.use_db = storage.database_type(opts) - # Let the user know what they are using... v = get_current_version() --- 181,192 ---- elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) ! elif opt == '-P': ! state.plugin = load_plugin(arg, state) if state.plugin is None: + print >> sys.stderr, "No plugin argument (-P) was given." print >> sys.stderr, __doc__ sys.exit() # Let the user know what they are using... v = get_current_version() From montanaro at users.sourceforge.net Sat Jun 2 23:42:10 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sat, 02 Jun 2007 14:42:10 -0700 Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1.2.2, 1.1.2.3 CoreUI.py, 1.1.2.5, 1.1.2.6 XMLRPCPlugin.py, 1.1.2.2, 1.1.2.3 Message-ID: <20070602214216.685791E4004@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv8063/spambayes Modified Files: Tag: CORESVR CorePlugin.py CoreUI.py XMLRPCPlugin.py Log Message: a step closer - actually scored a message! Index: CorePlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CorePlugin.py,v retrieving revision 1.1.2.2 retrieving revision 1.1.2.3 diff -C2 -d -r1.1.2.2 -r1.1.2.3 *** CorePlugin.py 29 May 2007 01:27:17 -0000 1.1.2.2 --- CorePlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3 *************** *** 12,19 **** self.name = name self.ui = ui - self.hammie = None - - def set_hammie(self, hammie): - self.hammie = hammie class PluginUI: --- 12,15 ---- Index: CoreUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v retrieving revision 1.1.2.5 retrieving revision 1.1.2.6 diff -C2 -d -r1.1.2.5 -r1.1.2.6 *** CoreUI.py 24 May 2007 03:19:34 -0000 1.1.2.5 --- CoreUI.py 2 Jun 2007 21:42:08 -0000 1.1.2.6 *************** *** 949,953 **** possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" - print "Loading database...", if self.is_test: self.use_db = "pickle" --- 949,952 ---- Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.2 retrieving revision 1.1.2.3 diff -C2 -d -r1.1.2.2 -r1.1.2.3 *** XMLRPCPlugin.py 29 May 2007 01:27:17 -0000 1.1.2.2 --- XMLRPCPlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3 *************** *** 8,11 **** --- 8,12 ---- from spambayes.OptionsClass import * from spambayes.Options import _, options + from spambayes.tokenizer import tokenize class XMLRPCUI(PluginUI): *************** *** 36,48 **** return self.score_mime(mime_message) ! def score_mime(self, msg): ! try: ! if isinstance(msg, (str, unicode)): ! msg = message_from_string(msg) ! tokens = tokenizer.tokenize(msg) ! return self.state.bayes.spamprob(tokens, evidence=True) ! except: ! import traceback ! traceback.print_exc() def form_to_mime(form, attachments, extra_tokens): --- 37,47 ---- return self.score_mime(mime_message) ! def score_mime(self, msg, mime_type): ! if self.state.bayes is None: ! self.state.create_workers() ! msg = unicode(msg, mime_type) ! msg = message_from_string(msg) ! tokens = tokenize(msg) ! return self.state.bayes.spamprob(tokens, evidence=True) def form_to_mime(form, attachments, extra_tokens): From montanaro at users.sourceforge.net Mon Jun 4 14:28:35 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 04 Jun 2007 05:28:35 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.3, 1.1.2.4 Message-ID: <20070604122840.94D081E4005@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv23471/spambayes Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: + docstring, refine API a bit Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.3 retrieving revision 1.1.2.4 diff -C2 -d -r1.1.2.3 -r1.1.2.4 *** XMLRPCPlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3 --- XMLRPCPlugin.py 4 Jun 2007 12:28:33 -0000 1.1.2.4 *************** *** 1,6 **** import threading from email import Message, message_from_string - from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler --- 1,61 ---- + """ + XML-RPC plugin for SpamBayes core server. + + This plugin opens an XML-RPC server in a separate thread listening to the + given host and port (default localhost:5001). In Python 2.5 and later it + also enforces a path (default /sbrpc). + + SECURITY NOTE: The XML-RPC plugin provide *NO SECURITY*. It would be + unwise to listen to anything besides 'localhost'. Similarly, when + running the core_server configured with the XML-RPC plugin it's quite + likely that the main core_server interface will have to listen to + something other than localhost to allow administrators to administer + it remotely. Access to that URL should only be available to a set of + trusted administrators, probably by proxy through some other webserver + which provides the necessary authentication support. + + The XML-RPC server exposes the following two methods: + + score(form_dict, extra_tokens) -> (score, evidence) + Scores a dictionary representing the contents of a web + submission form and a list of any extra tokens provided + by the caller. The return value is a list containing + the spam probability of the input and a set of (token, + probability) pairs for the most significant tokens. + + score_mime(msg, encoding) -> (score, evidence) + Scores a MIME message (a string encoded using encoding). + The return value is as for the score method. + + train(form_dict, extra_tokens, is_spam) -> '' + Trains the given form and tokens as ham or spam. + + train_mime(msg, encoding, is_spam) -> '' + Trains the given MIME message as ham or spam. + + retrain() -> (nham, nspam) + Retrain from scratch on all saved MIME messages. + + get_corpus(is_spam) -> string + Retrieve the current ham or spam corpus (in Unix mbox format). + + set_corpus(string, is_spam) -> '' + Set the current ham or spam corpus (string in Unix mbox format). + Should normally be followed by a call to retrain(). + + The following options are available in the Plugin section of the options. + + xmlrpc_host - host to listen to (default: localhost) + xmlrpc_port - port to listen to (default: 5001) + xmlrpc_path - path to support (default: /sbrpc) + hambox - path on server to ham corpus (default: TBD...) + spambox - path on server to spam corpus (default: TBD...) + + """ + import threading + import xmlrpclib from email import Message, message_from_string from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler *************** *** 16,19 **** --- 71,76 ---- ('Plugin', 'xmlrpc_host'), ('Plugin', 'xmlrpc_port'), + ('Plugin', 'hambox'), + ('Plugin', 'spambox'), ) *************** *** 27,53 **** # Path is only enforced in Python 2.5 and later but we set it anyway. self.server.RequestHandlerClass.rpc_paths = (path,) ! self.server.register_function(self.score) ! self.server.register_function(self.score_mime) self.thread = threading.Thread(target=self.server.serve_forever) self.thread.start() ! # placeholders ! def score(self, form, attachments, extra_tokens): ! mime_message = form_to_mime(form, attachments, tokens) return self.score_mime(mime_message) ! def score_mime(self, msg, mime_type): if self.state.bayes is None: self.state.create_workers() ! msg = unicode(msg, mime_type) msg = message_from_string(msg) ! tokens = tokenize(msg) ! return self.state.bayes.spamprob(tokens, evidence=True) ! def form_to_mime(form, attachments, extra_tokens): msg = Message.Message() msg.set_type("multipart/digest") main = Message.Message() ! main.set_payload(" ".join([str(v) for v in form.values()])) msg.attach(main) for msg_type, content in attachments: --- 84,145 ---- # Path is only enforced in Python 2.5 and later but we set it anyway. self.server.RequestHandlerClass.rpc_paths = (path,) ! self.server.register_instance(self) self.thread = threading.Thread(target=self.server.serve_forever) self.thread.start() ! def _dispatch(self, method, params): ! if method in ("score", "score_mime", "train", "train_mime"): ! return getattr(self, method)(*params) ! elif method in ("retrain", "get_corpus", "set_corpus"): ! return "%s not yet implemented" % method ! else: ! raise xmlrpclib.Fault(404, '"%s" is not supported' % method) ! ! def score(self, form_dict, extra_tokens): ! """Score a dictionary + extra tokens.""" ! mime_message = form_to_mime(form_dict, extra_tokens) return self.score_mime(mime_message) ! def score_mime(self, msg, encoding): ! """Score a message representing a MIME document. ! ! The msg argument will be a string in the given encoding. ! """ ! tokens = self.tokenize(msg, encoding) ! return self.state.bayes.spamprob(tokens, evidence=True) ! ! def train(self, form_dict, extra_tokens, is_spam): ! """Train the form and extra tokens.""" ! mime_message = form_to_mime(form_dict, extra_tokens) ! return self.train_mime(mime_message, is_spam) ! ! def train_mime(self, msg, is_spam): ! """Train the message.""" ! tokens = self.tokenize(msg, encoding) ! return self.state.bayes.learn(tokens, is_spam) ! ! def tokenize(self, msg, encoding): ! """Tokenize the message. Make sure the bayes instance is available.""" if self.state.bayes is None: self.state.create_workers() ! msg = unicode(msg, encoding) msg = message_from_string(msg) ! return tokenize(msg) ! def form_to_mime(form, mime_type, extra_tokens): ! """Encode submission form bits as a MIME message. ! ! form - a dictionary of key/value pairs representing the form's contents ! extra_tokens - a sequence of synthetic tokens generated by the caller. ! For example, if you include a honeypot hidden field in your form, you ! might generate a synthetic token which tells if it was filled in or not. ! You might also generate tokens which indicate how long a submitting ! username has existed or how many successful posts that username has ! submitted. ! """ msg = Message.Message() msg.set_type("multipart/digest") main = Message.Message() ! main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()])) msg.attach(main) for msg_type, content in attachments: From montanaro at users.sourceforge.net Tue Jun 5 04:18:15 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 04 Jun 2007 19:18:15 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.4, 1.1.2.5 Message-ID: <20070605021821.B13B01E4002@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30513 Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: Couple minor tweaks Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.4 retrieving revision 1.1.2.5 diff -C2 -d -r1.1.2.4 -r1.1.2.5 *** XMLRPCPlugin.py 4 Jun 2007 12:28:33 -0000 1.1.2.4 --- XMLRPCPlugin.py 5 Jun 2007 02:18:13 -0000 1.1.2.5 *************** *** 16,20 **** which provides the necessary authentication support. ! The XML-RPC server exposes the following two methods: score(form_dict, extra_tokens) -> (score, evidence) --- 16,20 ---- which provides the necessary authentication support. ! The XML-RPC server exposes the following methods: score(form_dict, extra_tokens) -> (score, evidence) *************** *** 42,46 **** set_corpus(string, is_spam) -> '' ! Set the current ham or spam corpus (string in Unix mbox format). Should normally be followed by a call to retrain(). --- 42,46 ---- set_corpus(string, is_spam) -> '' ! Set the current ham or spam corpus (a string in Unix mbox format). Should normally be followed by a call to retrain(). *************** *** 112,121 **** """Train the form and extra tokens.""" mime_message = form_to_mime(form_dict, extra_tokens) ! return self.train_mime(mime_message, is_spam) ! def train_mime(self, msg, is_spam): """Train the message.""" tokens = self.tokenize(msg, encoding) ! return self.state.bayes.learn(tokens, is_spam) def tokenize(self, msg, encoding): --- 112,122 ---- """Train the form and extra tokens.""" mime_message = form_to_mime(form_dict, extra_tokens) ! return self.train_mime(mime_message, "ascii", is_spam) ! def train_mime(self, msg, encoding, is_spam): """Train the message.""" tokens = self.tokenize(msg, encoding) ! self.state.bayes.learn(tokens, is_spam) ! return "" def tokenize(self, msg, encoding): From montanaro at users.sourceforge.net Tue Jun 5 04:43:44 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 04 Jun 2007 19:43:44 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.5, 1.1.2.6 Message-ID: <20070605024348.487331E4002@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7990 Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: Dump the training-related methods. I think the core server will manage training. Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.5 retrieving revision 1.1.2.6 diff -C2 -d -r1.1.2.5 -r1.1.2.6 *** XMLRPCPlugin.py 5 Jun 2007 02:18:13 -0000 1.1.2.5 --- XMLRPCPlugin.py 5 Jun 2007 02:43:41 -0000 1.1.2.6 *************** *** 29,48 **** The return value is as for the score method. - train(form_dict, extra_tokens, is_spam) -> '' - Trains the given form and tokens as ham or spam. - - train_mime(msg, encoding, is_spam) -> '' - Trains the given MIME message as ham or spam. - - retrain() -> (nham, nspam) - Retrain from scratch on all saved MIME messages. - - get_corpus(is_spam) -> string - Retrieve the current ham or spam corpus (in Unix mbox format). - - set_corpus(string, is_spam) -> '' - Set the current ham or spam corpus (a string in Unix mbox format). - Should normally be followed by a call to retrain(). - The following options are available in the Plugin section of the options. --- 29,32 ---- *************** *** 50,55 **** xmlrpc_port - port to listen to (default: 5001) xmlrpc_path - path to support (default: /sbrpc) - hambox - path on server to ham corpus (default: TBD...) - spambox - path on server to spam corpus (default: TBD...) """ --- 34,37 ---- *************** *** 71,76 **** ('Plugin', 'xmlrpc_host'), ('Plugin', 'xmlrpc_port'), - ('Plugin', 'hambox'), - ('Plugin', 'spambox'), ) --- 53,56 ---- *************** *** 89,96 **** def _dispatch(self, method, params): ! if method in ("score", "score_mime", "train", "train_mime"): return getattr(self, method)(*params) - elif method in ("retrain", "get_corpus", "set_corpus"): - return "%s not yet implemented" % method else: raise xmlrpclib.Fault(404, '"%s" is not supported' % method) --- 69,74 ---- def _dispatch(self, method, params): ! if method in ("score", "score_mime"): return getattr(self, method)(*params) else: raise xmlrpclib.Fault(404, '"%s" is not supported' % method) *************** *** 109,123 **** return self.state.bayes.spamprob(tokens, evidence=True) - def train(self, form_dict, extra_tokens, is_spam): - """Train the form and extra tokens.""" - mime_message = form_to_mime(form_dict, extra_tokens) - return self.train_mime(mime_message, "ascii", is_spam) - - def train_mime(self, msg, encoding, is_spam): - """Train the message.""" - tokens = self.tokenize(msg, encoding) - self.state.bayes.learn(tokens, is_spam) - return "" - def tokenize(self, msg, encoding): """Tokenize the message. Make sure the bayes instance is available.""" --- 87,90 ---- From montanaro at users.sourceforge.net Wed Jun 6 05:30:49 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue, 05 Jun 2007 20:30:49 -0700 Subject: [Spambayes-checkins] spambayes/spambayes CoreUI.py, 1.1.2.6, 1.1.2.7 Message-ID: <20070606033054.38AC91E4005@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13991 Modified Files: Tag: CORESVR CoreUI.py Log Message: Avoid shadowing "header" loop vrbl. Correct capitalization. Index: CoreUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v retrieving revision 1.1.2.6 retrieving revision 1.1.2.7 diff -C2 -d -r1.1.2.6 -r1.1.2.7 *** CoreUI.py 2 Jun 2007 21:42:08 -0000 1.1.2.6 --- CoreUI.py 6 Jun 2007 03:30:45 -0000 1.1.2.7 *************** *** 606,613 **** sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (header.lower(),) ! h.headerName = header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: --- 606,613 ---- sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (disp_header.lower(),) ! h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: *************** *** 1029,1033 **** else: self.numUnsure += 1 ! self.stats.recordClassification(score) def buildStatusStrings(self): --- 1029,1033 ---- else: self.numUnsure += 1 ! self.stats.RecordClassification(score) def buildStatusStrings(self): From montanaro at users.sourceforge.net Wed Jun 6 05:31:27 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue, 05 Jun 2007 20:31:27 -0700 Subject: [Spambayes-checkins] spambayes/spambayes ProxyUI.py,1.64,1.64.2.1 Message-ID: <20070606033131.0FFCE1E4005@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv14398 Modified Files: Tag: CORESVR ProxyUI.py Log Message: Avoid shadowing "header" loop vrbl. Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.64 retrieving revision 1.64.2.1 diff -C2 -d -r1.64 -r1.64.2.1 *** ProxyUI.py 28 Nov 2005 10:54:18 -0000 1.64 --- ProxyUI.py 6 Jun 2007 03:31:25 -0000 1.64.2.1 *************** *** 619,626 **** sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (header.lower(),) ! h.headerName = header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: --- 619,626 ---- sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (disp_header.lower(),) ! h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: From montanaro at users.sourceforge.net Wed Jun 6 05:37:41 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue, 05 Jun 2007 20:37:41 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.6, 1.1.2.7 Message-ID: <20070606033744.626CE1E4005@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv16690 Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: cache messages make xmlrpc thread a daemon convert msg text to utf-8 string if it's unicode Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.6 retrieving revision 1.1.2.7 diff -C2 -d -r1.1.2.6 -r1.1.2.7 *** XMLRPCPlugin.py 5 Jun 2007 02:43:41 -0000 1.1.2.6 --- XMLRPCPlugin.py 6 Jun 2007 03:37:37 -0000 1.1.2.7 *************** *** 46,49 **** --- 46,50 ---- from spambayes.Options import _, options from spambayes.tokenizer import tokenize + import spambayes.message class XMLRPCUI(PluginUI): *************** *** 66,69 **** --- 67,71 ---- self.server.register_instance(self) self.thread = threading.Thread(target=self.server.serve_forever) + self.thread.setDaemon(True) self.thread.start() *************** *** 79,97 **** return self.score_mime(mime_message) ! def score_mime(self, msg, encoding): """Score a message representing a MIME document. The msg argument will be a string in the given encoding. """ - tokens = self.tokenize(msg, encoding) - return self.state.bayes.spamprob(tokens, evidence=True) ! def tokenize(self, msg, encoding): ! """Tokenize the message. Make sure the bayes instance is available.""" if self.state.bayes is None: self.state.create_workers() ! msg = unicode(msg, encoding) ! msg = message_from_string(msg) ! return tokenize(msg) def form_to_mime(form, mime_type, extra_tokens): --- 81,114 ---- return self.score_mime(mime_message) ! def score_mime(self, msg_text, encoding): """Score a message representing a MIME document. The msg argument will be a string in the given encoding. """ ! # XXX Much of this probably belongs in the core server... ! if self.state.bayes is None: self.state.create_workers() ! if isinstance(msg_text, unicode): ! msg_text = msg_text.encode("utf-8") ! msg = message_from_string(msg_text, ! _class=spambayes.message.SBHeaderMessage) ! ! tokens = tokenize(msg) ! ! # XXX Maybe from here on down... ! ! prob = self.state.bayes.spamprob(tokens, evidence=False) ! self.state.record_classification(msg.GetClassification(), prob) ! ! # Cache the message. ! if not self.state.is_test and options["Storage", "cache_messages"]: ! msg.setId(self.state.getNewMessageName()) ! # Write the message into the Unknown cache. ! makeMessage = self.state.unknownCorpus.makeMessage ! message = makeMessage(msg.getId(), msg.as_string()) ! self.state.unknownCorpus.addMessage(message) ! return prob def form_to_mime(form, mime_type, extra_tokens): From montanaro at users.sourceforge.net Thu Jun 7 00:24:19 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed, 06 Jun 2007 15:24:19 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.7, 1.1.2.8 Message-ID: <20070606222426.897431E4006@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv10998 Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: We scored our first (fake) form submission today. Yay! Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.7 retrieving revision 1.1.2.8 diff -C2 -d -r1.1.2.7 -r1.1.2.8 *** XMLRPCPlugin.py 6 Jun 2007 03:37:37 -0000 1.1.2.7 --- XMLRPCPlugin.py 6 Jun 2007 22:24:17 -0000 1.1.2.8 *************** *** 76,83 **** raise xmlrpclib.Fault(404, '"%s" is not supported' % method) ! def score(self, form_dict, extra_tokens): """Score a dictionary + extra tokens.""" ! mime_message = form_to_mime(form_dict, extra_tokens) ! return self.score_mime(mime_message) def score_mime(self, msg_text, encoding): --- 76,84 ---- raise xmlrpclib.Fault(404, '"%s" is not supported' % method) ! def score(self, form_dict, extra_tokens, attachments): """Score a dictionary + extra tokens.""" ! mime_message = form_to_mime(form_dict, extra_tokens, attachments) ! mime_message = unicode(mime_message).encode("utf-8") ! return self.score_mime(mime_message, "utf-8") def score_mime(self, msg_text, encoding): *************** *** 100,104 **** # XXX Maybe from here on down... ! prob = self.state.bayes.spamprob(tokens, evidence=False) self.state.record_classification(msg.GetClassification(), prob) --- 101,107 ---- # XXX Maybe from here on down... ! prob, clues = self.state.bayes.spamprob(tokens, evidence=True) ! msg.addSBHeaders(prob, clues) ! self.state.record_classification(msg.GetClassification(), prob) *************** *** 112,140 **** return prob ! def form_to_mime(form, mime_type, extra_tokens): """Encode submission form bits as a MIME message. form - a dictionary of key/value pairs representing the form's contents extra_tokens - a sequence of synthetic tokens generated by the caller. ! For example, if you include a honeypot hidden field in your form, you ! might generate a synthetic token which tells if it was filled in or not. ! You might also generate tokens which indicate how long a submitting ! username has existed or how many successful posts that username has ! submitted. """ msg = Message.Message() msg.set_type("multipart/digest") main = Message.Message() main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()])) msg.attach(main) ! for msg_type, content in attachments: attachment = Message.Message() ! attachment.set_type(msg_type) ! attachment.set_payload(content) msg.attach(attachment) ! if extra_tokens: ! extra = Message.Message() ! extra.set_payload(" ".join(extra_tokens)) ! msg.attach(extra) return msg --- 115,160 ---- return prob ! def form_to_mime(form, extra_tokens, attachments): """Encode submission form bits as a MIME message. form - a dictionary of key/value pairs representing the form's contents extra_tokens - a sequence of synthetic tokens generated by the caller. ! For example, if you include a honeypot hidden field in your form, you ! might generate a synthetic token which tells if it was filled in or not. ! You might also generate tokens which indicate how long a submitting ! username has existed or how many successful posts that username has ! submitted. ! attachments - list of dictionaries describing an attachment. ! The 'payload' key is required. If there is no 'content-type' key ! 'application/octet-stream' is assumed. If 'content-transfer-encoding' ! is given it will be added to the headers of the attachment. Note that ! the keys are case-sensitive and must be lower case. """ msg = Message.Message() msg.set_type("multipart/digest") + msg.add_header("Subject", "Form submission") + main = Message.Message() main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()])) msg.attach(main) ! ! # Always add the extra tokens payload so we can reliably reverse the ! # conversion. ! extra = Message.Message() ! extra.set_type("text/plain") ! extra.set_payload("\n".join(extra_tokens)) ! msg.attach(extra) ! ! # Any further payloads are for the attachments. ! for content in attachments: ! mime_type = content.get("content-type") or "application/octet-stream" attachment = Message.Message() ! if "content-transfer-encoding" in content: ! attachment.add_header("Content-Transfer-Encoding", ! content["content-transfer-encoding"]) ! attachment.set_type(mime_type) ! attachment.set_payload(content["payload"]) msg.attach(attachment) ! return msg From montanaro at users.sourceforge.net Thu Jun 7 01:29:16 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed, 06 Jun 2007 16:29:16 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.8, 1.1.2.9 Message-ID: <20070606232921.5A00A1E4007@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv4360 Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: Add a From: header and content type for the main form items. Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.8 retrieving revision 1.1.2.9 diff -C2 -d -r1.1.2.8 -r1.1.2.9 *** XMLRPCPlugin.py 6 Jun 2007 22:24:17 -0000 1.1.2.8 --- XMLRPCPlugin.py 6 Jun 2007 23:29:13 -0000 1.1.2.9 *************** *** 134,140 **** msg.set_type("multipart/digest") msg.add_header("Subject", "Form submission") main = Message.Message() ! main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()])) msg.attach(main) --- 134,142 ---- msg.set_type("multipart/digest") msg.add_header("Subject", "Form submission") + msg.add_header("From", "SpamBayes XMLRPC Plugin ") main = Message.Message() ! main.set_type("text/plain") ! main.set_payload("\n".join(["%s:%s" % (k, v) for (k, v) in form.items()])) msg.attach(main) From montanaro at users.sourceforge.net Thu Jun 7 03:23:43 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed, 06 Jun 2007 18:23:43 -0700 Subject: [Spambayes-checkins] spambayes/spambayes WebAppPlugin.py, 1.1.2.2, NONE Message-ID: <20070607012347.7D9C41E4005@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv15442 Removed Files: Tag: CORESVR WebAppPlugin.py Log Message: thought I removed this already --- WebAppPlugin.py DELETED --- From montanaro at users.sourceforge.net Thu Jun 7 04:50:17 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed, 06 Jun 2007 19:50:17 -0700 Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1.2.3, 1.1.2.4 XMLRPCPlugin.py, 1.1.2.9, 1.1.2.10 Message-ID: <20070607025022.901B91E400B@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv17014 Modified Files: Tag: CORESVR CorePlugin.py XMLRPCPlugin.py Log Message: a couple small cleanups suggested by pylint Index: CorePlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CorePlugin.py,v retrieving revision 1.1.2.3 retrieving revision 1.1.2.4 diff -C2 -d -r1.1.2.3 -r1.1.2.4 *** CorePlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3 --- CorePlugin.py 7 Jun 2007 02:50:13 -0000 1.1.2.4 *************** *** 3,8 **** """ - import sys - __author__ = "Skip Montanaro Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv16141/spambayes Modified Files: Tag: CORESVR CoreUI.py ProxyUI.py UserInterface.py Log Message: trivial refactoring (these should probably both be functions, not methods) Index: CoreUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v retrieving revision 1.1.2.7 retrieving revision 1.1.2.8 diff -C2 -d -r1.1.2.7 -r1.1.2.8 *** CoreUI.py 6 Jun 2007 03:30:45 -0000 1.1.2.7 --- CoreUI.py 8 Jun 2007 02:09:51 -0000 1.1.2.8 *************** *** 206,226 **** self.write(_("

OK. Return Home.

")) - def _keyToTimestamp(self, key): - """Given a message key (as seen in a Corpus), returns the timestamp - for that message. This is the time that the message was received, - not the Date header.""" - return long(key[:10]) - - def _getTimeRange(self, timestamp): - """Given a unix timestamp, returns a 3-tuple: the start timestamp - of the given day, the end timestamp of the given day, and the - formatted date of the given day.""" - this = time.localtime(timestamp) - start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8]) - end = time.localtime(time.mktime(start) + 36*60*60) - end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8]) - date = time.strftime("%A, %B %d, %Y", start) - return time.mktime(start), time.mktime(end), date - def _buildReviewKeys(self, timestamp): """Builds an ordered list of untrained message keys, ready for output --- 206,209 ---- Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.64.2.1 retrieving revision 1.64.2.2 diff -C2 -d -r1.64.2.1 -r1.64.2.2 *** ProxyUI.py 6 Jun 2007 03:31:25 -0000 1.64.2.1 --- ProxyUI.py 8 Jun 2007 02:09:51 -0000 1.64.2.2 *************** *** 111,115 **** ) ! # Like the above, but hese are the options that will be offered on the # advanced configuration page. adv_map = ( --- 111,115 ---- ) ! # Like the above, but these are the options that will be offered on the # advanced configuration page. adv_map = ( *************** *** 221,241 **** self.write(_("

OK. Return Home.

")) - def _keyToTimestamp(self, key): - """Given a message key (as seen in a Corpus), returns the timestamp - for that message. This is the time that the message was received, - not the Date header.""" - return long(key[:10]) - - def _getTimeRange(self, timestamp): - """Given a unix timestamp, returns a 3-tuple: the start timestamp - of the given day, the end timestamp of the given day, and the - formatted date of the given day.""" - this = time.localtime(timestamp) - start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8]) - end = time.localtime(time.mktime(start) + 36*60*60) - end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8]) - date = time.strftime("%A, %B %d, %Y", start) - return time.mktime(start), time.mktime(end), date - def _buildReviewKeys(self, timestamp): """Builds an ordered list of untrained message keys, ready for output --- 221,224 ---- Index: UserInterface.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v retrieving revision 1.61.2.1 retrieving revision 1.61.2.2 diff -C2 -d -r1.61.2.1 -r1.61.2.2 *** UserInterface.py 24 May 2007 03:19:34 -0000 1.61.2.1 --- UserInterface.py 8 Jun 2007 02:09:51 -0000 1.61.2.2 *************** *** 1253,1254 **** --- 1253,1273 ---- lines.append(''.join(cur_line)) return lines + + def _keyToTimestamp(self, key): + """Given a message key (as seen in a Corpus), returns the timestamp + for that message. This is the time that the message was received, + not the Date header.""" + return long(key[:10]) + + def _getTimeRange(self, timestamp): + """Given a unix timestamp, returns a 3-tuple: the start timestamp + of the given day, the end timestamp of the given day, and the + formatted date of the given day.""" + this = time.localtime(timestamp) + start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8]) + end = time.localtime(time.mktime(start) + 36*60*60) + end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8]) + date = time.strftime("%A, %B %d, %Y", start) + return time.mktime(start), time.mktime(end), date + + From montanaro at users.sourceforge.net Fri Jun 8 14:08:49 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Fri, 08 Jun 2007 05:08:49 -0700 Subject: [Spambayes-checkins] spambayes/spambayes CoreUI.py, 1.1.2.8, 1.1.2.9 ProxyUI.py, 1.64.2.2, 1.64.2.3 UserInterface.py, 1.61.2.2, 1.61.2.3 Message-ID: <20070608120856.EA2761E4006@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20242/spambayes Modified Files: Tag: CORESVR CoreUI.py ProxyUI.py UserInterface.py Log Message: A couple more refactorings. More will be possible once I get rid of holding state as a module-level global variable in the pop3 proxy. Index: CoreUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v retrieving revision 1.1.2.8 retrieving revision 1.1.2.9 diff -C2 -d -r1.1.2.8 -r1.1.2.9 *** CoreUI.py 8 Jun 2007 02:09:51 -0000 1.1.2.8 --- CoreUI.py 8 Jun 2007 12:08:46 -0000 1.1.2.9 *************** *** 54,58 **** import sys - import re import cgi import time --- 54,57 ---- *************** *** 72,81 **** from spambayes.compatsets import Set - from email.Iterators import typed_subpart_iterator - import UserInterface - from spambayes import tokenizer from spambayes.Options import options, load_options, get_pathname_option, _ ! from spambayes import i18n from spambayes import storage from spambayes import Stats --- 71,78 ---- from spambayes.compatsets import Set import UserInterface from spambayes.Options import options, load_options, get_pathname_option, _ ! ## no i18n yet... ! ##from spambayes import i18n from spambayes import storage from spambayes import Stats *************** *** 161,165 **** self.state = state self.app_for_version = "SpamBayes Proxy" - self.previous_sort = None if not state.can_stop: self.html._readonly = False --- 158,161 ---- *************** *** 246,351 **** return keys, date, prior, start, end - def _sortMessages(self, messages, sort_order, reverse=False): - """Sorts the message by the appropriate attribute. If this was the - previous sort order, then reverse it.""" - if sort_order is None or sort_order == "received": - # Default sorting, which is in reverse order of appearance. - # This is complicated because the 'received' info is the key. - messages.sort() - if self.previous_sort == sort_order: - messages.reverse() - self.previous_sort = None - else: - self.previous_sort = 'received' - return messages - tmplist = [(getattr(x[1], sort_order), x) for x in messages] - tmplist.sort() - if reverse: - tmplist.reverse() - return [x for (key, x) in tmplist] - - def _appendMessages(self, table, keyedMessageInfo, label, sort_order, - reverse=False): - """Appends the rows of a table of messages to 'table'.""" - stripe = 0 - - keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order, - reverse) - nrows = options["html_ui", "rows_per_section"] - for key, messageInfo in keyedMessageInfo[:nrows]: - unused, unused, messageInfo.received = \ - self._getTimeRange(self._keyToTimestamp(key)) - row = self.html.reviewRow.clone() - try: - score = messageInfo.score - except ValueError: - score = None - if label == _('Spam'): - if score is not None \ - and score > options["html_ui", "spam_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_spam_action"]) - elif label == _('Ham'): - if score is not None \ - and score < options["html_ui", "ham_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_ham_action"]) - else: - r_att = getattr(row, options["html_ui", - "default_unsure_action"]) - setattr(r_att, "checked", 1) - - row.optionalHeadersValues = '' # make way for real list - for header in options["html_ui", "display_headers"]: - header = header.lower() - text = getattr(messageInfo, "%sHeader" % (header,)) - if header == "subject": - # Subject is special, because it links to the body. - # If the user doesn't display the subject, then there - # is no link to the body. - h = self.html.reviewRow.linkedHeaderValue.clone() - h.text.title = messageInfo.bodySummary - h.text.href = "view?key=%s&corpus=%s" % (key, label) - else: - h = self.html.reviewRow.headerValue.clone() - h.text = text - row.optionalHeadersValues += h - - # Apart from any message headers, we may also wish to display - # the message score, and the time the message was received. - if options["html_ui", "display_score"]: - if isinstance(messageInfo.score, types.StringTypes): - # Presumably either "?" or "Err". - row.score_ = messageInfo.score - else: - row.score_ = "%.2f%%" % (messageInfo.score,) - else: - del row.score_ - if options["html_ui", "display_received_time"]: - row.received_ = messageInfo.received - else: - del row.received_ - - # Many characters can't go in the URL or they cause problems - # (&, ;, ?, etc). So we use the hex values for them all. - subj_list = [] - for c in messageInfo.subjectHeader: - subj_list.append("%%%s" % (hex(ord(c))[2:],)) - subj = "".join(subj_list) - row.classify.href = "showclues?key=%s&subject=%s" % (key, subj) - row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" % - (key, subj)) - setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr! - setattr(row, 'onMouseOut', - ["this.className='stripe_on';", - "this.className='stripe_off';"][stripe]) - row = str(row).replace('TYPE', label).replace('KEY', key) - table += row - stripe = stripe ^ 1 - def onReview(self, **params): """Present a list of message for (re)training.""" --- 242,245 ---- *************** *** 619,631 **** self._writePostamble(help_topic="review") - def _contains(self, a, b, ignore_case=False): - """Return true if substring b is part of string a.""" - assert isinstance(a, types.StringTypes) - assert isinstance(b, types.StringTypes) - if ignore_case: - a = a.lower() - b = b.lower() - return a.find(b) >= 0 - def onView(self, key, corpus): """View a message - linked from the Review page.""" --- 513,516 ---- *************** *** 688,755 **** self.write(html) - def _makeMessageInfo(self, message): - """Given an email.Message, return an object with subjectHeader, - bodySummary and other header (as needed) attributes. These objects - are passed into appendMessages by onReview - passing email.Message - objects directly uses too much memory. - """ - # Remove notations before displaying - see: - # [ 848365 ] Remove subject annotations from message review page - message.delNotations() - subjectHeader = message["Subject"] or "(none)" - headers = {"subject" : subjectHeader} - for header in options["html_ui", "display_headers"]: - headers[header.lower()] = (message[header] or "(none)") - score = message[options["Headers", "score_header_name"]] - if score: - # the score might have the log info at the end - op = score.find('(') - if op >= 0: - score = score[:op] - try: - score = float(score) * 100 - except ValueError: - # Hmm. The score header should only contain a floating - # point number. What's going on here, then? - score = "Err" # Let the user know something is wrong. - else: - # If the lookup fails, this means that the "include_score" - # option isn't activated. We have the choice here to either - # calculate it now, which is pretty inefficient, since we have - # already done so, or to admit that we don't know what it is. - # We'll go with the latter. - score = "?" - try: - part = typed_subpart_iterator(message, 'text', 'plain').next() - text = part.get_payload() - except StopIteration: - try: - part = typed_subpart_iterator(message, 'text', 'html').next() - text = part.get_payload() - text, unused = tokenizer.crack_html_style(text) - text, unused = tokenizer.crack_html_comment(text) - text = tokenizer.html_re.sub(' ', text) - text = _('(this message only has an HTML body)\n') + text - except StopIteration: - text = _('(this message has no text body)') - if type(text) == type([]): # gotta be a 'right' way to do this - text = _("(this message is a digest of %s messages)") % (len(text)) - elif text is None: - text = _("(this message has no body)") - else: - text = text.replace(' ', ' ') # Else they'll be quoted - text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines - text = text.strip() - - class _MessageInfo: - pass - messageInfo = _MessageInfo() - for headerName, headerValue in headers.items(): - headerValue = self._trimHeader(headerValue, 45, True) - setattr(messageInfo, "%sHeader" % (headerName,), headerValue) - messageInfo.score = score - messageInfo.bodySummary = self._trimHeader(text, 200) - return messageInfo - def close_database(self): self.state.close() --- 573,576 ---- *************** *** 824,827 **** --- 645,651 ---- self.is_test = False + self.spamCorpus = self.hamCorpus = self.unknownCorpus = None + self.spam_trainer = self.ham_trainer = None + self.init() Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.64.2.2 retrieving revision 1.64.2.3 diff -C2 -d -r1.64.2.2 -r1.64.2.3 *** ProxyUI.py 8 Jun 2007 02:09:51 -0000 1.64.2.2 --- ProxyUI.py 8 Jun 2007 12:08:46 -0000 1.64.2.3 *************** *** 54,58 **** True, False = 1, 0 - import re import cgi import time --- 54,57 ---- *************** *** 72,81 **** from spambayes.compatsets import Set - import tokenizer import UserInterface from spambayes.Options import options, _ - from email.Iterators import typed_subpart_iterator ! global state # These are the options that will be offered on the configuration page. --- 71,78 ---- from spambayes.compatsets import Set import UserInterface from spambayes.Options import options, _ ! state = None # These are the options that will be offered on the configuration page. *************** *** 174,178 **** self.state_recreator = state_recreator # ugly self.app_for_version = "SpamBayes Proxy" - self.previous_sort = None if not proxy_state.can_stop: self.html._readonly = False --- 171,174 ---- *************** *** 261,365 **** return keys, date, prior, start, end - def _sortMessages(self, messages, sort_order, reverse=False): - """Sorts the message by the appropriate attribute. If this was the - previous sort order, then reverse it.""" - if sort_order is None or sort_order == "received": - # Default sorting, which is in reverse order of appearance. - # This is complicated because the 'received' info is the key. - messages.sort() - if self.previous_sort == sort_order: - messages.reverse() - self.previous_sort = None - else: - self.previous_sort = 'received' - return messages - tmplist = [(getattr(x[1], sort_order), x) for x in messages] - tmplist.sort() - if reverse: - tmplist.reverse() - return [x for (key, x) in tmplist] - - def _appendMessages(self, table, keyedMessageInfo, label, sort_order, - reverse=False): - """Appends the rows of a table of messages to 'table'.""" - stripe = 0 - - keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order, - reverse) - nrows = options["html_ui", "rows_per_section"] - for key, messageInfo in keyedMessageInfo[:nrows]: - unused, unused, messageInfo.received = \ - self._getTimeRange(self._keyToTimestamp(key)) - row = self.html.reviewRow.clone() - try: - score = messageInfo.score - except ValueError: - score = None - if label == _('Spam'): - if score is not None \ - and score > options["html_ui", "spam_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_spam_action"]) - elif label == _('Ham'): - if score is not None \ - and score < options["html_ui", "ham_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_ham_action"]) - else: - r_att = getattr(row, options["html_ui", - "default_unsure_action"]) - setattr(r_att, "checked", 1) - - row.optionalHeadersValues = '' # make way for real list - for header in options["html_ui", "display_headers"]: - header = header.lower() - text = getattr(messageInfo, "%sHeader" % (header,)) - if header == "subject": - # Subject is special, because it links to the body. - # If the user doesn't display the subject, then there - # is no link to the body. - h = self.html.reviewRow.linkedHeaderValue.clone() - h.text.title = messageInfo.bodySummary - h.text.href = "view?key=%s&corpus=%s" % (key, label) - else: - h = self.html.reviewRow.headerValue.clone() - h.text = text - row.optionalHeadersValues += h - - # Apart from any message headers, we may also wish to display - # the message score, and the time the message was received. - if options["html_ui", "display_score"]: - if isinstance(messageInfo.score, types.StringTypes): - # Presumably either "?" or "Err". - row.score_ = messageInfo.score - else: - row.score_ = "%.2f%%" % (messageInfo.score,) - else: - del row.score_ - if options["html_ui", "display_received_time"]: - row.received_ = messageInfo.received - else: - del row.received_ - - # Many characters can't go in the URL or they cause problems - # (&, ;, ?, etc). So we use the hex values for them all. - subj_list = [] - for c in messageInfo.subjectHeader: - subj_list.append("%%%s" % (hex(ord(c))[2:],)) - subj = "".join(subj_list) - row.classify.href="showclues?key=%s&subject=%s" % (key, subj) - row.tokens.href="showclues?key=%s&subject=%s&tokens=1" % (key, subj) - setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr! - setattr(row, 'onMouseOut', - ["this.className='stripe_on';", - "this.className='stripe_off';"][stripe]) - row = str(row).replace('TYPE', label).replace('KEY', key) - table += row - stripe = stripe ^ 1 - def onReview(self, **params): """Present a list of message for (re)training.""" --- 257,260 ---- *************** *** 632,644 **** self._writePostamble(help_topic="review") - def _contains(self, a, b, ignore_case=False): - """Return true if substring b is part of string a.""" - assert isinstance(a, types.StringTypes) - assert isinstance(b, types.StringTypes) - if ignore_case: - a = a.lower() - b = b.lower() - return a.find(b) >= 0 - def onView(self, key, corpus): """View a message - linked from the Review page.""" --- 527,530 ---- *************** *** 685,752 **** self._writePostamble() - def _makeMessageInfo(self, message): - """Given an email.Message, return an object with subjectHeader, - bodySummary and other header (as needed) attributes. These objects - are passed into appendMessages by onReview - passing email.Message - objects directly uses too much memory. - """ - # Remove notations before displaying - see: - # [ 848365 ] Remove subject annotations from message review page - message.delNotations() - subjectHeader = message["Subject"] or "(none)" - headers = {"subject" : subjectHeader} - for header in options["html_ui", "display_headers"]: - headers[header.lower()] = (message[header] or "(none)") - score = message[options["Headers", "score_header_name"]] - if score: - # the score might have the log info at the end - op = score.find('(') - if op >= 0: - score = score[:op] - try: - score = float(score) * 100 - except ValueError: - # Hmm. The score header should only contain a floating - # point number. What's going on here, then? - score = "Err" # Let the user know something is wrong. - else: - # If the lookup fails, this means that the "include_score" - # option isn't activated. We have the choice here to either - # calculate it now, which is pretty inefficient, since we have - # already done so, or to admit that we don't know what it is. - # We'll go with the latter. - score = "?" - try: - part = typed_subpart_iterator(message, 'text', 'plain').next() - text = part.get_payload() - except StopIteration: - try: - part = typed_subpart_iterator(message, 'text', 'html').next() - text = part.get_payload() - text, unused = tokenizer.crack_html_style(text) - text, unused = tokenizer.crack_html_comment(text) - text = tokenizer.html_re.sub(' ', text) - text = _('(this message only has an HTML body)\n') + text - except StopIteration: - text = _('(this message has no text body)') - if type(text) == type([]): # gotta be a 'right' way to do this - text = _("(this message is a digest of %s messages)") % (len(text)) - elif text is None: - text = _("(this message has no body)") - else: - text = text.replace(' ', ' ') # Else they'll be quoted - text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines - text = text.strip() - - class _MessageInfo: - pass - messageInfo = _MessageInfo() - for headerName, headerValue in headers.items(): - headerValue = self._trimHeader(headerValue, 45, True) - setattr(messageInfo, "%sHeader" % (headerName,), headerValue) - messageInfo.score = score - messageInfo.bodySummary = self._trimHeader(text, 200) - return messageInfo - def close_database(self): state.close() --- 571,574 ---- Index: UserInterface.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v retrieving revision 1.61.2.2 retrieving revision 1.61.2.3 diff -C2 -d -r1.61.2.2 -r1.61.2.3 *** UserInterface.py 8 Jun 2007 02:09:51 -0000 1.61.2.2 --- UserInterface.py 8 Jun 2007 12:08:46 -0000 1.61.2.3 *************** *** 80,83 **** --- 80,84 ---- import types import StringIO + from email.Iterators import typed_subpart_iterator import oe_mailbox *************** *** 277,280 **** --- 278,282 ---- self.stats = stats self.app_for_version = None # subclasses must fill this in + self.previous_sort = None def onClassify(self, file, text, which): *************** *** 1271,1273 **** --- 1273,1448 ---- return time.mktime(start), time.mktime(end), date + def _sortMessages(self, messages, sort_order, reverse=False): + """Sorts the message by the appropriate attribute. If this was the + previous sort order, then reverse it.""" + if sort_order is None or sort_order == "received": + # Default sorting, which is in reverse order of appearance. + # This is complicated because the 'received' info is the key. + messages.sort() + if self.previous_sort == sort_order: + messages.reverse() + self.previous_sort = None + else: + self.previous_sort = 'received' + return messages + tmplist = [(getattr(x[1], sort_order), x) for x in messages] + tmplist.sort() + if reverse: + tmplist.reverse() + return [x for (key, x) in tmplist] + + def _appendMessages(self, table, keyedMessageInfo, label, sort_order, + reverse=False): + """Appends the rows of a table of messages to 'table'.""" + stripe = 0 + + keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order, + reverse) + nrows = options["html_ui", "rows_per_section"] + for key, messageInfo in keyedMessageInfo[:nrows]: + unused, unused, messageInfo.received = \ + self._getTimeRange(self._keyToTimestamp(key)) + row = self.html.reviewRow.clone() + try: + score = messageInfo.score + except ValueError: + score = None + if label == _('Spam'): + if score is not None \ + and score > options["html_ui", "spam_discard_level"]: + r_att = getattr(row, 'discard') + else: + r_att = getattr(row, options["html_ui", + "default_spam_action"]) + elif label == _('Ham'): + if score is not None \ + and score < options["html_ui", "ham_discard_level"]: + r_att = getattr(row, 'discard') + else: + r_att = getattr(row, options["html_ui", + "default_ham_action"]) + else: + r_att = getattr(row, options["html_ui", + "default_unsure_action"]) + setattr(r_att, "checked", 1) + + row.optionalHeadersValues = '' # make way for real list + for header in options["html_ui", "display_headers"]: + header = header.lower() + text = getattr(messageInfo, "%sHeader" % (header,)) + if header == "subject": + # Subject is special, because it links to the body. + # If the user doesn't display the subject, then there + # is no link to the body. + h = self.html.reviewRow.linkedHeaderValue.clone() + h.text.title = messageInfo.bodySummary + h.text.href = "view?key=%s&corpus=%s" % (key, label) + else: + h = self.html.reviewRow.headerValue.clone() + h.text = text + row.optionalHeadersValues += h + + # Apart from any message headers, we may also wish to display + # the message score, and the time the message was received. + if options["html_ui", "display_score"]: + if isinstance(messageInfo.score, types.StringTypes): + # Presumably either "?" or "Err". + row.score_ = messageInfo.score + else: + row.score_ = "%.2f%%" % (messageInfo.score,) + else: + del row.score_ + if options["html_ui", "display_received_time"]: + row.received_ = messageInfo.received + else: + del row.received_ + # Many characters can't go in the URL or they cause problems + # (&, ;, ?, etc). So we use the hex values for them all. + subj_list = [] + for c in messageInfo.subjectHeader: + subj_list.append("%%%s" % (hex(ord(c))[2:],)) + subj = "".join(subj_list) + row.classify.href = "showclues?key=%s&subject=%s" % (key, subj) + row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" % + (key, subj)) + setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr! + setattr(row, 'onMouseOut', + ["this.className='stripe_on';", + "this.className='stripe_off';"][stripe]) + row = str(row).replace('TYPE', label).replace('KEY', key) + table += row + stripe = stripe ^ 1 + + def _contains(self, a, b, ignore_case=False): + """Return true if substring b is part of string a.""" + assert isinstance(a, types.StringTypes) + assert isinstance(b, types.StringTypes) + if ignore_case: + a = a.lower() + b = b.lower() + return a.find(b) >= 0 + + def _makeMessageInfo(self, message): + """Given an email.Message, return an object with subjectHeader, + bodySummary and other header (as needed) attributes. These objects + are passed into appendMessages by onReview - passing email.Message + objects directly uses too much memory. + """ + # Remove notations before displaying - see: + # [ 848365 ] Remove subject annotations from message review page + message.delNotations() + subjectHeader = message["Subject"] or "(none)" + headers = {"subject" : subjectHeader} + for header in options["html_ui", "display_headers"]: + headers[header.lower()] = (message[header] or "(none)") + score = message[options["Headers", "score_header_name"]] + if score: + # the score might have the log info at the end + op = score.find('(') + if op >= 0: + score = score[:op] + try: + score = float(score) * 100 + except ValueError: + # Hmm. The score header should only contain a floating + # point number. What's going on here, then? + score = "Err" # Let the user know something is wrong. + else: + # If the lookup fails, this means that the "include_score" + # option isn't activated. We have the choice here to either + # calculate it now, which is pretty inefficient, since we have + # already done so, or to admit that we don't know what it is. + # We'll go with the latter. + score = "?" + try: + part = typed_subpart_iterator(message, 'text', 'plain').next() + text = part.get_payload() + except StopIteration: + try: + part = typed_subpart_iterator(message, 'text', 'html').next() + text = part.get_payload() + text, unused = tokenizer.crack_html_style(text) + text, unused = tokenizer.crack_html_comment(text) + text = tokenizer.html_re.sub(' ', text) + text = _('(this message only has an HTML body)\n') + text + except StopIteration: + text = _('(this message has no text body)') + if type(text) == type([]): # gotta be a 'right' way to do this + text = _("(this message is a digest of %s messages)") % (len(text)) + elif text is None: + text = _("(this message has no body)") + else: + text = text.replace(' ', ' ') # Else they'll be quoted + text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines + text = text.strip() + + class _MessageInfo: + pass + messageInfo = _MessageInfo() + for headerName, headerValue in headers.items(): + headerValue = self._trimHeader(headerValue, 45, True) + setattr(messageInfo, "%sHeader" % (headerName,), headerValue) + messageInfo.score = score + messageInfo.bodySummary = self._trimHeader(text, 200) + return messageInfo From montanaro at users.sourceforge.net Sun Jun 10 17:22:44 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:22:44 -0700 Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.10, 1.1.2.11 Message-ID: <20070610152250.54D661E4009@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv5944/spambayes Modified Files: Tag: CORESVR XMLRPCPlugin.py Log Message: typo in comment Index: XMLRPCPlugin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v retrieving revision 1.1.2.10 retrieving revision 1.1.2.11 diff -C2 -d -r1.1.2.10 -r1.1.2.11 *** XMLRPCPlugin.py 7 Jun 2007 02:50:13 -0000 1.1.2.10 --- XMLRPCPlugin.py 10 Jun 2007 15:22:41 -0000 1.1.2.11 *************** *** 32,36 **** xmlrpc_host - host to listen to (default: localhost) ! xmlrpc_port - port to listen to (default: 5001) xmlrpc_path - path to support (default: /sbrpc) --- 32,36 ---- xmlrpc_host - host to listen to (default: localhost) ! xmlrpc_port - port to listen to (default: 8001) xmlrpc_path - path to support (default: /sbrpc) From montanaro at users.sourceforge.net Sun Jun 10 17:27:40 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:27:40 -0700 Subject: [Spambayes-checkins] website faq.txt,1.93,1.94 Message-ID: <20070610152742.50D981E4008@bag.python.org> Update of /cvsroot/spambayes/website In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/website Modified Files: faq.txt Log Message: Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py and attendant bits, such as an XML-RPC plugin. The web interface is straight from the POP3 proxy server. Index: faq.txt =================================================================== RCS file: /cvsroot/spambayes/website/faq.txt,v retrieving revision 1.93 retrieving revision 1.94 diff -C2 -d -r1.93 -r1.94 *** faq.txt 13 May 2007 13:44:51 -0000 1.93 --- faq.txt 10 Jun 2007 15:27:37 -0000 1.94 *************** *** 294,297 **** --- 294,303 ---- http://mail.python.org/pipermail/spambayes-bugs/2007-January/004119.html + Note that you will probably have to execute the installer with elevated + privileges. Right-clicking on the EXE and selecting "Run as Administrator" + should work (and will be necessary even if you are logged in as an admin + user). + + Does SpamBayes work with Outlook Express? ----------------------------------------- From montanaro at users.sourceforge.net Sun Jun 10 17:27:38 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:27:38 -0700 Subject: [Spambayes-checkins] spambayes setup.py,1.32,1.33 Message-ID: <20070610152742.5E3A51E400D@bag.python.org> Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512 Modified Files: setup.py Log Message: Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py and attendant bits, such as an XML-RPC plugin. The web interface is straight from the POP3 proxy server. Index: setup.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/setup.py,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** setup.py 6 Dec 2004 03:04:17 -0000 1.32 --- setup.py 10 Jun 2007 15:27:36 -0000 1.33 *************** *** 100,103 **** --- 100,104 ---- 'scripts/sb_pop3dnd.py', 'scripts/sb_server.py', + 'scripts/core_server.py', 'scripts/sb_unheader.py', 'scripts/sb_upload.py', *************** *** 128,131 **** --- 129,133 ---- 'spambayes', 'spambayes.resources', + 'spambayes.core_resources', ], classifiers = [ From montanaro at users.sourceforge.net Sun Jun 10 17:27:39 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:27:39 -0700 Subject: [Spambayes-checkins] spambayes/scripts core_server.py,1.1,1.2 Message-ID: <20070610152742.7B4571E400E@bag.python.org> Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/scripts Added Files: core_server.py Log Message: Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py and attendant bits, such as an XML-RPC plugin. The web interface is straight from the POP3 proxy server. From montanaro at users.sourceforge.net Sun Jun 10 17:27:39 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:27:39 -0700 Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1, 1.2 CoreUI.py, 1.1, 1.2 XMLRPCPlugin.py, 1.1, 1.2 Options.py, 1.141, 1.142 ProxyUI.py, 1.64, 1.65 UserInterface.py, 1.61, 1.62 dnscache.py, 1.3, 1.4 storage.py, 1.63, 1.64 Message-ID: <20070610152744.B48811E400B@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/spambayes Modified Files: Options.py ProxyUI.py UserInterface.py dnscache.py storage.py Added Files: CorePlugin.py CoreUI.py XMLRPCPlugin.py Log Message: Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py and attendant bits, such as an XML-RPC plugin. The web interface is straight from the POP3 proxy server. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.141 retrieving revision 1.142 diff -C2 -d -r1.141 -r1.142 *** Options.py 26 Mar 2007 07:57:13 -0000 1.141 --- Options.py 10 Jun 2007 15:27:36 -0000 1.142 *************** *** 653,656 **** --- 653,671 ---- PATH, DO_NOT_RESTORE), + ("core_spam_cache", _("Spam cache directory"), "core-spam-cache", + _("""Directory that SpamBayes should cache spam in. If this does + not exist, it will be created."""), + PATH, DO_NOT_RESTORE), + + ("core_ham_cache", _("Ham cache directory"), "core-ham-cache", + _("""Directory that SpamBayes should cache ham in. If this does + not exist, it will be created."""), + PATH, DO_NOT_RESTORE), + + ("core_unknown_cache", _("Unknown cache directory"), "core-unknown-cache", + _("""Directory that SpamBayes should cache unclassified messages in. + If this does not exist, it will be created."""), + PATH, DO_NOT_RESTORE), + ("cache_messages", _("Cache messages"), True, _("""You can disable the pop3proxy caching of messages. This *************** *** 1280,1283 **** --- 1295,1309 ---- r"\w\w(?:_\w\w)?", RESTORE), ), + "Plugin": ( + ("xmlrpc_path", _("XML-RPC path"), "/sbrpc", + _("""The path to respond to."""), + r"[\w]+", RESTORE), + ("xmlrpc_host", _("XML-RPC host"), "localhost", + _("""The host to listen on."""), + SERVER, RESTORE), + ("xmlrpc_port", _("XML-RPC port"), 8001, + _("""The port to listen on."""), + r"[\d]+", RESTORE), + ), } Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.64 retrieving revision 1.65 diff -C2 -d -r1.64 -r1.65 *** ProxyUI.py 28 Nov 2005 10:54:18 -0000 1.64 --- ProxyUI.py 10 Jun 2007 15:27:36 -0000 1.65 *************** *** 54,58 **** True, False = 1, 0 - import re import cgi import time --- 54,57 ---- *************** *** 72,81 **** from spambayes.compatsets import Set - import tokenizer import UserInterface from spambayes.Options import options, _ - from email.Iterators import typed_subpart_iterator ! global state # These are the options that will be offered on the configuration page. --- 71,78 ---- from spambayes.compatsets import Set import UserInterface from spambayes.Options import options, _ ! state = None # These are the options that will be offered on the configuration page. *************** *** 111,115 **** ) ! # Like the above, but hese are the options that will be offered on the # advanced configuration page. adv_map = ( --- 108,112 ---- ) ! # Like the above, but these are the options that will be offered on the # advanced configuration page. adv_map = ( *************** *** 174,178 **** self.state_recreator = state_recreator # ugly self.app_for_version = "SpamBayes Proxy" - self.previous_sort = None if not proxy_state.can_stop: self.html._readonly = False --- 171,174 ---- *************** *** 221,241 **** self.write(_("

OK. Return Home.

")) - def _keyToTimestamp(self, key): - """Given a message key (as seen in a Corpus), returns the timestamp - for that message. This is the time that the message was received, - not the Date header.""" - return long(key[:10]) - - def _getTimeRange(self, timestamp): - """Given a unix timestamp, returns a 3-tuple: the start timestamp - of the given day, the end timestamp of the given day, and the - formatted date of the given day.""" - this = time.localtime(timestamp) - start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8]) - end = time.localtime(time.mktime(start) + 36*60*60) - end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8]) - date = time.strftime("%A, %B %d, %Y", start) - return time.mktime(start), time.mktime(end), date - def _buildReviewKeys(self, timestamp): """Builds an ordered list of untrained message keys, ready for output --- 217,220 ---- *************** *** 278,382 **** return keys, date, prior, start, end - def _sortMessages(self, messages, sort_order, reverse=False): - """Sorts the message by the appropriate attribute. If this was the - previous sort order, then reverse it.""" - if sort_order is None or sort_order == "received": - # Default sorting, which is in reverse order of appearance. - # This is complicated because the 'received' info is the key. - messages.sort() - if self.previous_sort == sort_order: - messages.reverse() - self.previous_sort = None - else: - self.previous_sort = 'received' - return messages - tmplist = [(getattr(x[1], sort_order), x) for x in messages] - tmplist.sort() - if reverse: - tmplist.reverse() - return [x for (key, x) in tmplist] - - def _appendMessages(self, table, keyedMessageInfo, label, sort_order, - reverse=False): - """Appends the rows of a table of messages to 'table'.""" - stripe = 0 - - keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order, - reverse) - nrows = options["html_ui", "rows_per_section"] - for key, messageInfo in keyedMessageInfo[:nrows]: - unused, unused, messageInfo.received = \ - self._getTimeRange(self._keyToTimestamp(key)) - row = self.html.reviewRow.clone() - try: - score = messageInfo.score - except ValueError: - score = None - if label == _('Spam'): - if score is not None \ - and score > options["html_ui", "spam_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_spam_action"]) - elif label == _('Ham'): - if score is not None \ - and score < options["html_ui", "ham_discard_level"]: - r_att = getattr(row, 'discard') - else: - r_att = getattr(row, options["html_ui", - "default_ham_action"]) - else: - r_att = getattr(row, options["html_ui", - "default_unsure_action"]) - setattr(r_att, "checked", 1) - - row.optionalHeadersValues = '' # make way for real list - for header in options["html_ui", "display_headers"]: - header = header.lower() - text = getattr(messageInfo, "%sHeader" % (header,)) - if header == "subject": - # Subject is special, because it links to the body. - # If the user doesn't display the subject, then there - # is no link to the body. - h = self.html.reviewRow.linkedHeaderValue.clone() - h.text.title = messageInfo.bodySummary - h.text.href = "view?key=%s&corpus=%s" % (key, label) - else: - h = self.html.reviewRow.headerValue.clone() - h.text = text - row.optionalHeadersValues += h - - # Apart from any message headers, we may also wish to display - # the message score, and the time the message was received. - if options["html_ui", "display_score"]: - if isinstance(messageInfo.score, types.StringTypes): - # Presumably either "?" or "Err". - row.score_ = messageInfo.score - else: - row.score_ = "%.2f%%" % (messageInfo.score,) - else: - del row.score_ - if options["html_ui", "display_received_time"]: - row.received_ = messageInfo.received - else: - del row.received_ - - # Many characters can't go in the URL or they cause problems - # (&, ;, ?, etc). So we use the hex values for them all. - subj_list = [] - for c in messageInfo.subjectHeader: - subj_list.append("%%%s" % (hex(ord(c))[2:],)) - subj = "".join(subj_list) - row.classify.href="showclues?key=%s&subject=%s" % (key, subj) - row.tokens.href="showclues?key=%s&subject=%s&tokens=1" % (key, subj) - setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr! - setattr(row, 'onMouseOut', - ["this.className='stripe_on';", - "this.className='stripe_off';"][stripe]) - row = str(row).replace('TYPE', label).replace('KEY', key) - table += row - stripe = stripe ^ 1 - def onReview(self, **params): """Present a list of message for (re)training.""" --- 257,260 ---- *************** *** 619,626 **** sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (header.lower(),) ! h.headerName = header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: --- 497,504 ---- sh.optionalHeaders = '' h = self.html.headerHeader.clone() ! for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ ! (disp_header.lower(),) ! h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: *************** *** 649,661 **** self._writePostamble(help_topic="review") - def _contains(self, a, b, ignore_case=False): - """Return true if substring b is part of string a.""" - assert isinstance(a, types.StringTypes) - assert isinstance(b, types.StringTypes) - if ignore_case: - a = a.lower() - b = b.lower() - return a.find(b) >= 0 - def onView(self, key, corpus): """View a message - linked from the Review page.""" --- 527,530 ---- *************** *** 702,769 **** self._writePostamble() - def _makeMessageInfo(self, message): - """Given an email.Message, return an object with subjectHeader, - bodySummary and other header (as needed) attributes. These objects - are passed into appendMessages by onReview - passing email.Message - objects directly uses too much memory. - """ - # Remove notations before displaying - see: - # [ 848365 ] Remove subject annotations from message review page - message.delNotations() - subjectHeader = message["Subject"] or "(none)" - headers = {"subject" : subjectHeader} - for header in options["html_ui", "display_headers"]: - headers[header.lower()] = (message[header] or "(none)") - score = message[options["Headers", "score_header_name"]] - if score: - # the score might have the log info at the end - op = score.find('(') - if op >= 0: - score = score[:op] - try: - score = float(score) * 100 - except ValueError: - # Hmm. The score header should only contain a floating - # point number. What's going on here, then? - score = "Err" # Let the user know something is wrong. - else: - # If the lookup fails, this means that the "include_score" - # option isn't activated. We have the choice here to either - # calculate it now, which is pretty inefficient, since we have - # already done so, or to admit that we don't know what it is. - # We'll go with the latter. - score = "?" - try: - part = typed_subpart_iterator(message, 'text', 'plain').next() - text = part.get_payload() - except StopIteration: - try: - part = typed_subpart_iterator(message, 'text', 'html').next() - text = part.get_payload() - text, unused = tokenizer.crack_html_style(text) - text, unused = tokenizer.crack_html_comment(text) - text = tokenizer.html_re.sub(' ', text) - text = _('(this message only has an HTML body)\n') + text - except StopIteration: - text = _('(this message has no text body)') - if type(text) == type([]): # gotta be a 'right' way to do this - text = _("(this message is a digest of %s messages)") % (len(text)) - elif text is None: - text = _("(this message has no body)") - else: - text = text.replace(' ', ' ') # Else they'll be quoted - text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines - text = text.strip() - - class _MessageInfo: - pass - messageInfo = _MessageInfo() - for headerName, headerValue in headers.items(): - headerValue = self._trimHeader(headerValue, 45, True) - setattr(messageInfo, "%sHeader" % (headerName,), headerValue) - messageInfo.score = score - messageInfo.bodySummary = self._trimHeader(text, 200) - return messageInfo - def close_database(self): state.close() --- 571,574 ---- Index: UserInterface.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v retrieving revision 1.61 retrieving revision 1.62 diff -C2 -d -r1.61 -r1.62 *** UserInterface.py 28 Nov 2005 10:53:12 -0000 1.61 --- UserInterface.py 10 Jun 2007 15:27:36 -0000 1.62 *************** *** 80,83 **** --- 80,84 ---- import types import StringIO + from email.Iterators import typed_subpart_iterator import oe_mailbox *************** *** 277,280 **** --- 278,282 ---- self.stats = stats self.app_for_version = None # subclasses must fill this in + self.previous_sort = None def onClassify(self, file, text, which): *************** *** 820,823 **** --- 822,827 ---- elif parms["how"] == _("Save experimental options"): pmap = experimental_ini_map + elif parms["how"] == _("Save plugin options"): + pmap = self.plugin_ini_map del parms["how"] html = self._getHTMLClone() *************** *** 1251,1252 **** --- 1255,1448 ---- lines.append(''.join(cur_line)) return lines + + def _keyToTimestamp(self, key): + """Given a message key (as seen in a Corpus), returns the timestamp + for that message. This is the time that the message was received, + not the Date header.""" + return long(key[:10]) + + def _getTimeRange(self, timestamp): + """Given a unix timestamp, returns a 3-tuple: the start timestamp + of the given day, the end timestamp of the given day, and the + formatted date of the given day.""" + this = time.localtime(timestamp) + start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8]) + end = time.localtime(time.mktime(start) + 36*60*60) + end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8]) + date = time.strftime("%A, %B %d, %Y", start) + return time.mktime(start), time.mktime(end), date + + def _sortMessages(self, messages, sort_order, reverse=False): + """Sorts the message by the appropriate attribute. If this was the + previous sort order, then reverse it.""" + if sort_order is None or sort_order == "received": + # Default sorting, which is in reverse order of appearance. + # This is complicated because the 'received' info is the key. + messages.sort() + if self.previous_sort == sort_order: + messages.reverse() + self.previous_sort = None + else: + self.previous_sort = 'received' + return messages + tmplist = [(getattr(x[1], sort_order), x) for x in messages] + tmplist.sort() + if reverse: + tmplist.reverse() + return [x for (key, x) in tmplist] + + def _appendMessages(self, table, keyedMessageInfo, label, sort_order, + reverse=False): + """Appends the rows of a table of messages to 'table'.""" + stripe = 0 + + keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order, + reverse) + nrows = options["html_ui", "rows_per_section"] + for key, messageInfo in keyedMessageInfo[:nrows]: + unused, unused, messageInfo.received = \ + self._getTimeRange(self._keyToTimestamp(key)) + row = self.html.reviewRow.clone() + try: + score = messageInfo.score + except ValueError: + score = None + if label == _('Spam'): + if score is not None \ + and score > options["html_ui", "spam_discard_level"]: + r_att = getattr(row, 'discard') + else: + r_att = getattr(row, options["html_ui", + "default_spam_action"]) + elif label == _('Ham'): + if score is not None \ + and score < options["html_ui", "ham_discard_level"]: + r_att = getattr(row, 'discard') + else: + r_att = getattr(row, options["html_ui", + "default_ham_action"]) + else: + r_att = getattr(row, options["html_ui", + "default_unsure_action"]) + setattr(r_att, "checked", 1) + + row.optionalHeadersValues = '' # make way for real list + for header in options["html_ui", "display_headers"]: + header = header.lower() + text = getattr(messageInfo, "%sHeader" % (header,)) + if header == "subject": + # Subject is special, because it links to the body. + # If the user doesn't display the subject, then there + # is no link to the body. + h = self.html.reviewRow.linkedHeaderValue.clone() + h.text.title = messageInfo.bodySummary + h.text.href = "view?key=%s&corpus=%s" % (key, label) + else: + h = self.html.reviewRow.headerValue.clone() + h.text = text + row.optionalHeadersValues += h + + # Apart from any message headers, we may also wish to display + # the message score, and the time the message was received. + if options["html_ui", "display_score"]: + if isinstance(messageInfo.score, types.StringTypes): + # Presumably either "?" or "Err". + row.score_ = messageInfo.score + else: + row.score_ = "%.2f%%" % (messageInfo.score,) + else: + del row.score_ + if options["html_ui", "display_received_time"]: + row.received_ = messageInfo.received + else: + del row.received_ + + # Many characters can't go in the URL or they cause problems + # (&, ;, ?, etc). So we use the hex values for them all. + subj_list = [] + for c in messageInfo.subjectHeader: + subj_list.append("%%%s" % (hex(ord(c))[2:],)) + subj = "".join(subj_list) + row.classify.href = "showclues?key=%s&subject=%s" % (key, subj) + row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" % + (key, subj)) + setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr! + setattr(row, 'onMouseOut', + ["this.className='stripe_on';", + "this.className='stripe_off';"][stripe]) + row = str(row).replace('TYPE', label).replace('KEY', key) + table += row + stripe = stripe ^ 1 + + def _contains(self, a, b, ignore_case=False): + """Return true if substring b is part of string a.""" + assert isinstance(a, types.StringTypes) + assert isinstance(b, types.StringTypes) + if ignore_case: + a = a.lower() + b = b.lower() + return a.find(b) >= 0 + + def _makeMessageInfo(self, message): + """Given an email.Message, return an object with subjectHeader, + bodySummary and other header (as needed) attributes. These objects + are passed into appendMessages by onReview - passing email.Message + objects directly uses too much memory. + """ + # Remove notations before displaying - see: + # [ 848365 ] Remove subject annotations from message review page + message.delNotations() + subjectHeader = message["Subject"] or "(none)" + headers = {"subject" : subjectHeader} + for header in options["html_ui", "display_headers"]: + headers[header.lower()] = (message[header] or "(none)") + score = message[options["Headers", "score_header_name"]] + if score: + # the score might have the log info at the end + op = score.find('(') + if op >= 0: + score = score[:op] + try: + score = float(score) * 100 + except ValueError: + # Hmm. The score header should only contain a floating + # point number. What's going on here, then? + score = "Err" # Let the user know something is wrong. + else: + # If the lookup fails, this means that the "include_score" + # option isn't activated. We have the choice here to either + # calculate it now, which is pretty inefficient, since we have + # already done so, or to admit that we don't know what it is. + # We'll go with the latter. + score = "?" + try: + part = typed_subpart_iterator(message, 'text', 'plain').next() + text = part.get_payload() + except StopIteration: + try: + part = typed_subpart_iterator(message, 'text', 'html').next() + text = part.get_payload() + text, unused = tokenizer.crack_html_style(text) + text, unused = tokenizer.crack_html_comment(text) + text = tokenizer.html_re.sub(' ', text) + text = _('(this message only has an HTML body)\n') + text + except StopIteration: + text = _('(this message has no text body)') + if type(text) == type([]): # gotta be a 'right' way to do this + text = _("(this message is a digest of %s messages)") % (len(text)) + elif text is None: + text = _("(this message has no body)") + else: + text = text.replace(' ', ' ') # Else they'll be quoted + text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines + text = text.strip() + + class _MessageInfo: + pass + messageInfo = _MessageInfo() + for headerName, headerValue in headers.items(): + headerValue = self._trimHeader(headerValue, 45, True) + setattr(messageInfo, "%sHeader" % (headerName,), headerValue) + messageInfo.score = score + messageInfo.bodySummary = self._trimHeader(text, 200) + return messageInfo Index: dnscache.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/dnscache.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** dnscache.py 13 Aug 2006 02:05:43 -0000 1.3 --- dnscache.py 10 Jun 2007 15:27:36 -0000 1.4 *************** *** 23,29 **** kCheckForPruneEvery=20 ! kMaxTTL=60 * 60 * 24 * 7 # One week ! kPruneThreshold=1500 # May go over slightly; numbers chosen at random ! kPruneDownTo=1000 --- 23,32 ---- kCheckForPruneEvery=20 ! kMaxTTL=60 * 60 * 24 * 7 # One week ! # Some servers always return a TTL of zero. We'll hold onto data a bit ! # longer. ! kMinTTL=24 * 60 * 60 * 1 # one day ! kPruneThreshold=5000 # May go over slightly; numbers chosen at random ! kPruneDownTo=2500 *************** *** 89,97 **** self.dnsTimeout=10 - # Some servers always return a TTL of zero. - # In those cases, turning this up a bit is - # probably reasonable. - self.minTTL=0 - # end of user-settable attributes --- 92,95 ---- *************** *** 160,164 **** c=self.caches[answer.qType] c[answer.question].remove(answer) ! if len(c[answer.question])==0: del c[answer.question] --- 158,162 ---- c=self.caches[answer.qType] c[answer.question].remove(answer) ! if not c[answer.question]: del c[answer.question] *************** *** 180,184 **** c=self.caches[answer.qType] c[answer.question].remove(answer) ! if len(c[answer.question])==0: del c[answer.question] --- 178,182 ---- c=self.caches[answer.qType] c[answer.question].remove(answer) ! if not c[answer.question]: del c[answer.question] *************** *** 218,233 **** pass else: ! assert len(answers)>0 ! ind=0 ! # No guarantee that expire has already been done ! while ind> sys.stderr, "lookup failure:", question ! if not answers: del cacheToLookIn[question] else: *************** *** 250,275 **** except DNS.Base.DNSError,detail: if detail.args[0]<>"Timeout": ! print "Error, fixme",detail ! print "Question was",queryQuestion ! print "Origianal question was",question ! print "Type was",qType objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ] cacheToLookIn[question]=objs # Add to format for return? return self.formatForReturn(objs) except socket.gaierror,detail: ! print "DNS connection failure:", self.queryObj.ns, detail ! print "Defaults:", DNS.defaults objs=[] for answer in reply.answers: if answer["typename"]==qType: ! # PyDNS returns TTLs as longs but RFC 1035 says that the ! # TTL value is a signed 32-bit value and must be positive, ! # so it should be safe to coerce it to a Python integer. ! # And anyone who sets a time to live of more than 2^31-1 ! # seconds (68 years and change) is drunk. ! # Arguably, I ought to impose a maximum rather than continuing ! # with longs (int(long) returns long in recent versions of Python). ! ttl=max(min(int(answer["ttl"]),kMaxTTL),self.minTTL) # RFC 2308 says that you should cache an NXDOMAIN for the # minimum of the minimum field of the SOA record and the TTL --- 250,275 ---- except DNS.Base.DNSError,detail: if detail.args[0]<>"Timeout": ! print >> sys.stderr, "Error, fixme", detail ! print >> sys.stderr, "Question was", queryQuestion ! print >> sys.stderr, "Original question was", question ! print >> sys.stderr, "Type was", qType objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ] cacheToLookIn[question]=objs # Add to format for return? return self.formatForReturn(objs) except socket.gaierror,detail: ! print >> sys.stderr, "DNS connection failure:", self.queryObj.ns, detail ! print >> sys.stderr, "Defaults:", DNS.defaults objs=[] for answer in reply.answers: if answer["typename"]==qType: ! # PyDNS returns TTLs as longs but RFC 1035 says that the TTL ! # value is a signed 32-bit value and must be positive, so it ! # should be safe to coerce it to a Python integer. And ! # anyone who sets a time to live of more than 2^31-1 seconds ! # (68 years and change) is drunk. Arguably, I ought to ! # impose a maximum rather than continuing with longs ! # (int(long) returns long in recent versions of Python). ! ttl=max(min(int(answer["ttl"]),kMaxTTL),kMinTTL) # RFC 2308 says that you should cache an NXDOMAIN for the # minimum of the minimum field of the SOA record and the TTL *************** *** 279,288 **** objs.append(item) ! if len(objs)>0: cacheToLookIn[question]=objs return self.formatForReturn(objs) # Probably SERVFAIL or the like ! if len(reply.authority)==0: objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ] cacheToLookIn[question]=objs --- 279,288 ---- objs.append(item) ! if objs: cacheToLookIn[question]=objs return self.formatForReturn(objs) # Probably SERVFAIL or the like ! if not reply.authority: objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ] cacheToLookIn[question]=objs *************** *** 319,329 **** "www.seeputofor.com", "www.completegarbage.tv", "www.tradelinkllc.com"]: ! print "checking", host now=time.time() ips=c.lookup(host) ! print ips,time.time()-now now=time.time() ips=c.lookup(host) ! print ips,time.time()-now if ips: --- 319,329 ---- "www.seeputofor.com", "www.completegarbage.tv", "www.tradelinkllc.com"]: ! print >> sys.stderr, "checking", host now=time.time() ips=c.lookup(host) ! print >> sys.stderr, ips,time.time()-now now=time.time() ips=c.lookup(host) ! print >> sys.stderr, ips,time.time()-now if ips: *************** *** 331,340 **** now=time.time() name=c.lookup(ip,qType="PTR") ! print name,time.time()-now now=time.time() name=c.lookup(ip,qType="PTR") ! print name,time.time()-now else: ! print "unknown" c.close() --- 331,340 ---- now=time.time() name=c.lookup(ip,qType="PTR") ! print >> sys.stderr, name,time.time()-now now=time.time() name=c.lookup(ip,qType="PTR") ! print >> sys.stderr, name,time.time()-now else: ! print >> sys.stderr, "unknown" c.close() Index: storage.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v retrieving revision 1.63 retrieving revision 1.64 diff -C2 -d -r1.63 -r1.64 *** storage.py 11 May 2007 00:23:08 -0000 1.63 --- storage.py 10 Jun 2007 15:27:36 -0000 1.64 *************** *** 721,726 **** import ZODB from ZODB.FileStorage import FileStorage ! self.storage = FileStorage(self.db_filename, ! read_only=self.mode=='r') def load(self): --- 721,731 ---- import ZODB from ZODB.FileStorage import FileStorage ! try: ! self.storage = FileStorage(self.db_filename, ! read_only=self.mode=='r') ! except IOError, msg: ! print >> sys.stderr, ("Could not create FileStorage from", ! self.db_filename) ! raise def load(self): *************** *** 774,778 **** from ZODB.POSException import ReadOnlyError ! assert self.closed == False, "Can't store a closed database" if options["globals", "verbose"]: --- 779,783 ---- from ZODB.POSException import ReadOnlyError ! assert not self.closed, "Can't store a closed database" if options["globals", "verbose"]: From montanaro at users.sourceforge.net Sun Jun 10 17:27:40 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun, 10 Jun 2007 08:27:40 -0700 Subject: [Spambayes-checkins] spambayes/spambayes/core_resources README.txt, 1.1, 1.2 README_txt.py, 1.1, 1.2 __init__.py, 1.1, 1.2 classify.gif, 1.1, 1.2 classify_gif.py, 1.1, 1.2 config.gif, 1.1, 1.2 config_gif.py, 1.1, 1.2 helmet.gif, 1.1, 1.2 helmet_gif.py, 1.1, 1.2 help.gif, 1.1, 1.2 help_gif.py, 1.1, 1.2 message.gif, 1.1, 1.2 message_gif.py, 1.1, 1.2 query.gif, 1.1, 1.2 query_gif.py, 1.1, 1.2 scanning__init__.py, 1.1, 1.2 status.gif, 1.1, 1.2 status_gif.py, 1.1, 1.2 train.gif, 1.1, 1.2 train_gif.py, 1.1, 1.2 ui.html, 1.1, 1.2 ui.psp, 1.1, 1.2 ui_html.py, 1.1, 1.2 ui_psp.py, 1.1, 1.2 Message-ID: <20070610152745.2DBF71E4008@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes/core_resources In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/spambayes/core_resources Added Files: README.txt README_txt.py __init__.py classify.gif classify_gif.py config.gif config_gif.py helmet.gif helmet_gif.py help.gif help_gif.py message.gif message_gif.py query.gif query_gif.py scanning__init__.py status.gif status_gif.py train.gif train_gif.py ui.html ui.psp ui_html.py ui_psp.py Log Message: Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py and attendant bits, such as an XML-RPC plugin. The web interface is straight from the POP3 proxy server. From montanaro at users.sourceforge.net Mon Jun 25 14:10:16 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 25 Jun 2007 05:10:16 -0700 Subject: [Spambayes-checkins] spambayes CHANGELOG.txt,1.58,1.59 Message-ID: <20070625121021.45FBD1E4012@bag.python.org> Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv19255 Modified Files: CHANGELOG.txt Log Message: . Index: CHANGELOG.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v retrieving revision 1.58 retrieving revision 1.59 diff -C2 -d -r1.58 -r1.59 *** CHANGELOG.txt 9 Sep 2006 23:02:06 -0000 1.58 --- CHANGELOG.txt 25 Jun 2007 12:10:10 -0000 1.59 *************** *** 3,8 **** Release 1.1a4 - Skip Montanaro 2006-09-09 First crack at handling image sequences Skip Montanaro 2006-09-09 Dump NetPBM decode support in favor of PIL Release 1.1a3 --- 3,14 ---- Release 1.1a4 Skip Montanaro 2006-09-09 Dump NetPBM decode support in favor of PIL + Skip Montanaro 2006-09-09 First crack at handling image sequences + Skip Montanaro 2007-05-11 IMAP patch for contrib/tte.py (Dave Abrahams) + Skip Montanaro 2007-05-11 Remove duplicate use of --cullext flag to contrib/tte.py + Skip Montanaro 2007-05-22 Note missing file name in error message - FileStorage.py + Skip Montanaro 2007-05-24 Set MinTTL to one day in dnscache.py + Skip Montanaro 2007-05-25 Catch broader exception in ImageStripper.py when image load fails (Sjoerd Mullender) + Skip Montanaro 2007-06-10 Add core_server.py & friends - plugin-based server Release 1.1a3 From montanaro at users.sourceforge.net Tue Jun 26 00:51:11 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 25 Jun 2007 15:51:11 -0700 Subject: [Spambayes-checkins] website developer.ht, 1.13, 1.14 download.ht, 1.37, 1.38 index.ht, 1.41, 1.42 Message-ID: <20070625225117.DDC181E4018@bag.python.org> Update of /cvsroot/spambayes/website In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv26214 Modified Files: developer.ht download.ht index.ht Log Message: 1.1a4 bits Index: developer.ht =================================================================== RCS file: /cvsroot/spambayes/website/developer.ht,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** developer.ht 16 Jan 2005 22:23:31 -0000 1.13 --- developer.ht 25 Jun 2007 22:51:01 -0000 1.14 *************** *** 35,56 ****

So what needs to be done

!

1.0 was released in July 2004, and was followed up by a bugfix 1.0.1 ! release in November 2004. We intend to fix as many remaining bugs with the ! 1.0.x branch as is practical and hope to release 1.0.2 towards the end of ! January 2005. This is likely to be the final release in the 1.0.x line, ! unless there are unforeseen problems with the 1.0.2 or 1.1 releases.

Since May 2004, work has been carried out on a 1.1 release, which includes many improvements, as well as bug fixes, compared to the 1.0.x ! branch. We hope to release 1.1a1 for public testing at the end of January ! 2005, to be followed by at least one more alpha, at least one beta, and ! at least one release candidate. We hope that a stable 1.1 release will ! be made in April 2005, although this date is certainly not fixed.

!

The 1.1 line will be frozen for non-bugfix changes from the first ! beta release (probably early March 2005). Many of the changes desired ! by the developers have been implemented, or partly so, but there is ! still time for further improvement. There is no time limit on ! implementing bug fixes.

Some key work that is in progress for 1.1, which you could assist --- 35,54 ----

So what needs to be done

! !

1.0 was released in July 2004, and was followed up by three bugfix ! releases starting in November 2004. The current stable release is 1.0.4. ! This is likely to be the final release in the 1.0.x line.

Since May 2004, work has been carried out on a 1.1 release, which includes many improvements, as well as bug fixes, compared to the 1.0.x ! branch. The latest alpha release is 1.1a4 (June 2007). If we could find ! more time or more help we could get to beta, release candidate and final ! releases of 1.1. We hope that a stable 1.1 release will be made during ! 2007, although this date is certainly not fixed.

!

The 1.1 line will be frozen for non-bugfix changes from the first beta ! release. Many of the changes desired by the developers have been ! implemented, or partly so, but there is still time for further improvement. ! There is no time limit on implementing bug fixes.

Some key work that is in progress for 1.1, which you could assist *************** *** 67,70 **** --- 65,78 ---- ZODB/ZEO, have been added, and the SQL backends improved.

  • Improvement in the unit testing suite.
  • +
  • Testing and/or improving the image handling capabilities. 1.1a3 + introduced OCR capability using the open source gocr program and + PIL.
  • +
  • Testing the new core_server.py application which implements a + plugin architecture for external protocol adapters. The first adapter + provides an XML-RPC interface, making it possible to extend SpamBayes to + websites and other non-mail applications. You could interface this server + to web applications such as Trac, MoinMoin or your favorite blog software. + You could also implement a POP3 protocol adapter so we can merge + core_server.py and sb-server.py. Index: download.ht =================================================================== RCS file: /cvsroot/spambayes/website/download.ht,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** download.ht 7 Aug 2006 22:23:26 -0000 1.37 --- download.ht 25 Jun 2007 22:51:01 -0000 1.38 *************** *** 11,16 **** spambayes at python.org. !

    The second alpha release of 1.1 is also now available. It is highly likely ! that there are new bugs in this release (especially with the IMAP filter), but if you are willing and able to give it a spin for us, that would be greatly appreciated. You might like to look at this --- 11,17 ---- spambayes at python.org. !

    The latest alpha release of 1.1 is 1.1a4. At the moment it is only ! available as a source release. There is no Windows installer. It is highly likely ! that there are new bugs in this release but if you are willing and able to give it a spin for us, that would be greatly appreciated. You might like to look at this *************** *** 72,88 ****


  • ! spambayes-1.1a2.exe ! (3,025,816 bytes, ! sig) !
  • !
  • 6c94cb14008580c309dd176af73f2132 ! spambayes-1.1a2.tar.gz ! (830,084 bytes, ! sig)
  • ! spambayes-1.1a2.zip ! (971,031 bytes, ! sig)
  • --- 73,85 ----
  • !
  • 35494ade1bf380651bcc3077bf108310 ! spambayes-1.1a4.tar.gz ! (992,533 bytes, ! sig)
  • ! sb-1.1a4.zip ! (1,128,015 bytes, ! sig)
  • Index: index.ht =================================================================== RCS file: /cvsroot/spambayes/website/index.ht,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** index.ht 7 Aug 2006 22:23:26 -0000 1.41 --- index.ht 25 Jun 2007 22:51:01 -0000 1.42 *************** *** 8,13 **** archives and a Windows binary installer).

    See the download page for more.

    !

    SpamBayes 1.1a2 is also now available! (This includes both the source ! archives and a Windows binary installers). This is an alpha release, so you should only try it if you are willing to try out experimental releases - otherwise stick with 1.0.4.

    --- 8,13 ---- archives and a Windows binary installer).

    See the download page for more.

    !

    SpamBayes 1.1a4 is also now available! (This currently includes only the ! source archives). This is an alpha release, so you should only try it if you are willing to try out experimental releases - otherwise stick with 1.0.4.

    From montanaro at users.sourceforge.net Tue Jun 26 00:52:23 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon, 25 Jun 2007 15:52:23 -0700 Subject: [Spambayes-checkins] website/sigs sb11a4.zip.asc, NONE, 1.1 spambayes-1.1a4.tar.gz.asc, NONE, 1.1 Message-ID: <20070625225226.406371E4005@bag.python.org> Update of /cvsroot/spambayes/website/sigs In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv27037 Added Files: sb11a4.zip.asc spambayes-1.1a4.tar.gz.asc Log Message: 1.1a4 sigs --- NEW FILE: sb11a4.zip.asc --- -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.1 (Darwin) iD8DBQBGgCiM3WE5XNCqNv4RAlRfAJsFSLY/3Nra2346bSWD/G28CSY/CQCgmHOx auQYWiLiTcuBSqdktMjq+sg= =xbyw -----END PGP SIGNATURE----- --- NEW FILE: spambayes-1.1a4.tar.gz.asc --- -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.1 (Darwin) iD8DBQBGgCh+3WE5XNCqNv4RAmIsAJwKuMFBC4R4GGPu/H7sM4ziXNe2bQCdGQJf TBpkq9CD66rFBdE7tDg8OmA= =cYIy -----END PGP SIGNATURE----- From montanaro at users.sourceforge.net Wed Jun 27 03:59:52 2007 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue, 26 Jun 2007 18:59:52 -0700 Subject: [Spambayes-checkins] spambayes README.txt,1.66,1.67 Message-ID: <20070627015956.904EE1E400A@bag.python.org> Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7047 Modified Files: README.txt Log Message: Add some directions about starting the XML-RPC (core) server. Index: README.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/README.txt,v retrieving revision 1.66 retrieving revision 1.67 diff -C2 -d -r1.66 -r1.67 *** README.txt 12 Apr 2004 01:59:26 -0000 1.66 --- README.txt 27 Jun 2007 01:59:49 -0000 1.67 *************** *** 179,182 **** --- 179,253 ---- + XML-RPC Server + -------------- + + The XML-RPC server (new in 1.1a4) web interface is almost identical the the + POP3 proxy user interface. Instead of proxying POP3 communications though + it provides an XML-RPC server your (typically non-mail) applications can use + to score content submissions. + + To install and configure it: + + 1. Unpack and install the distribution: + + tar xvfz spambayes-1.1a4.tar.gz + cd spambayes-1.1a4 + python setup.py install + + 2. Devote a runtime directory to it: + + SBDIR=/usr/local/spambayes/core_server # or whatever... + mkdir -p $SBDIR + + 3. Create an INI file: + + cd $SBDIR + cat > bayescustomize.ini < Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30139/scripts Modified Files: sb_server.py Log Message: Assign None to a Message instance's message_info_db Index: sb_server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_server.py,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** sb_server.py 7 Apr 2006 02:35:34 -0000 1.51 --- sb_server.py 27 Jun 2007 10:33:23 -0000 1.52 *************** *** 799,803 **** self.mdb.close() self.mdb = None ! spambayes.message.Message.message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None --- 799,803 ---- self.mdb.close() self.mdb = None ! spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None From mhammond at users.sourceforge.net Fri Jun 29 03:06:01 2007 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu, 28 Jun 2007 18:06:01 -0700 Subject: [Spambayes-checkins] spambayes/Outlook2000 config.py,1.38,1.39 Message-ID: <20070629010606.7506F1E400E@bag.python.org> Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20708/Outlook2000 Modified Files: config.py Log Message: Fix some string localization problems - strings such as "Untouched", "Moved" and "Copied" are used internally as flags, not as literals displayed to the user. Index: config.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/config.py,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** config.py 11 Feb 2005 21:05:49 -0000 1.38 --- config.py 29 Jun 2007 01:05:58 -0000 1.39 *************** *** 26,31 **** FOLDER_ID = r"\(\'[a-fA-F0-9]+\', \'[a-fA-F0-9]+\'\)" FIELD_NAME = r"[a-zA-Z0-9 ]+" ! FILTER_ACTION = _("Untouched"), _("Moved"), _("Copied") ! MSG_READ_STATE = _("None"), _("Read"), _("Unread") from spambayes.OptionsClass import OptionsClass, Option --- 26,34 ---- FOLDER_ID = r"\(\'[a-fA-F0-9]+\', \'[a-fA-F0-9]+\'\)" FIELD_NAME = r"[a-zA-Z0-9 ]+" ! # These are stored in the INI file. They must not be localized - we can't ! # have all option settings being unrecognized just because a new localization ! # becomes available for users. The dialogs manage this. ! FILTER_ACTION = "Untouched", "Moved", "Copied" ! MSG_READ_STATE = "None", "Read", "Unread" from spambayes.OptionsClass import OptionsClass, Option From mhammond at users.sourceforge.net Fri Jun 29 03:06:55 2007 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu, 28 Jun 2007 18:06:55 -0700 Subject: [Spambayes-checkins] spambayes/Outlook2000/dialogs dialog_map.py, 1.50, 1.51 Message-ID: <20070629010657.89BF01E4009@bag.python.org> Update of /cvsroot/spambayes/spambayes/Outlook2000/dialogs In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv21117/Outlook2000/dialogs Modified Files: dialog_map.py Log Message: Fix locaization of some strings in the dialogs. Index: dialog_map.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/dialog_map.py,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** dialog_map.py 11 Feb 2005 21:05:51 -0000 1.50 --- dialog_map.py 29 Jun 2007 01:06:53 -0000 1.51 *************** *** 477,481 **** "Filter.watch_folder_ids", "Filter.watch_include_sub"), ! (ComboProcessor, "IDC_ACTION_CERTAIN", "Filter.spam_action"), (FolderIDProcessor, "IDC_FOLDER_CERTAIN IDC_BROWSE_CERTAIN", "Filter.spam_folder_id"), --- 477,482 ---- "Filter.watch_folder_ids", "Filter.watch_include_sub"), ! (ComboProcessor, "IDC_ACTION_CERTAIN", "Filter.spam_action", ! _("Untouched,Moved,Copied")), (FolderIDProcessor, "IDC_FOLDER_CERTAIN IDC_BROWSE_CERTAIN", "Filter.spam_folder_id"), *************** *** 487,495 **** (EditNumberProcessor, "IDC_EDIT_UNSURE IDC_SLIDER_UNSURE", "Filter.unsure_threshold"), ! (ComboProcessor, "IDC_ACTION_UNSURE", "Filter.unsure_action"), (BoolButtonProcessor, "IDC_MARK_UNSURE_AS_READ", "Filter.unsure_mark_as_read"), (FolderIDProcessor, "IDC_FOLDER_HAM IDC_BROWSE_HAM", "Filter.ham_folder_id"), ! (ComboProcessor, "IDC_ACTION_HAM", "Filter.ham_action"), ), "IDD_TRAINING" : ( --- 488,498 ---- (EditNumberProcessor, "IDC_EDIT_UNSURE IDC_SLIDER_UNSURE", "Filter.unsure_threshold"), ! (ComboProcessor, "IDC_ACTION_UNSURE", "Filter.unsure_action", ! _("Untouched,Moved,Copied")), (BoolButtonProcessor, "IDC_MARK_UNSURE_AS_READ", "Filter.unsure_mark_as_read"), (FolderIDProcessor, "IDC_FOLDER_HAM IDC_BROWSE_HAM", "Filter.ham_folder_id"), ! (ComboProcessor, "IDC_ACTION_HAM", "Filter.ham_action", ! _("Untouched,Moved,Copied")), ), "IDD_TRAINING" : ( From mhammond at users.sourceforge.net Fri Jun 29 03:08:45 2007 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu, 28 Jun 2007 18:08:45 -0700 Subject: [Spambayes-checkins] spambayes/spambayes i18n.py,1.7,1.8 Message-ID: <20070629010849.692B21E4009@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv21706/spambayes Modified Files: i18n.py Log Message: To help in testing localizations, let SPAMBAYES_LANG in the environment override getdefaultlocale()[0] Index: i18n.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/i18n.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** i18n.py 8 Mar 2007 23:21:30 -0000 1.7 --- i18n.py 29 Jun 2007 01:08:43 -0000 1.8 *************** *** 115,120 **** """Get the default language for the locale.""" # Note that this may return None. ! return getdefaultlocale()[0] ! def add_language(self, lang_code=None): """Add a language to the current languages list. --- 115,123 ---- """Get the default language for the locale.""" # Note that this may return None. ! try: ! return os.environ["SPAMBAYES_LANG"] ! except KeyError: ! return getdefaultlocale()[0] ! def add_language(self, lang_code=None): """Add a language to the current languages list. From mhammond at users.sourceforge.net Fri Jun 29 04:03:19 2007 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Thu, 28 Jun 2007 19:03:19 -0700 Subject: [Spambayes-checkins] spambayes/windows/py2exe setup_all.py, 1.28, 1.29 Message-ID: <20070629020323.401B61E4009@bag.python.org> Update of /cvsroot/spambayes/spambayes/windows/py2exe In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv11161 Modified Files: setup_all.py Log Message: include gocr.exe and gocr.txt in the binary distribution. Index: setup_all.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/windows/py2exe/setup_all.py,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** setup_all.py 26 Mar 2007 08:03:14 -0000 1.28 --- setup_all.py 29 Jun 2007 02:03:16 -0000 1.29 *************** *** 178,181 **** --- 178,186 ---- ["", [os.path.join(sb_top_dir, r"windows\resources\sbicon.ico")]], ["", [os.path.join(sb_top_dir, r"LICENSE.txt")]], + # We insist gocr.exe is in the 'spambayes' package dir (we can make + # this smarter as necessary) + ["bin", [os.path.join(sb_top_dir, "spambayes", "gocr.exe")]], + # Our .txt file with info on gocr itself. + ["bin", [os.path.join(sb_top_dir, "windows", "py2exe", "gocr.txt")]], ]