From montanaro at users.sourceforge.net Sat Jun 2 23:42:10 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sat, 02 Jun 2007 14:42:10 -0700
Subject: [Spambayes-checkins] spambayes/scripts core_server.py, 1.1.2.3,
1.1.2.4
Message-ID: <20070602214215.6AC421E4004@bag.python.org>
Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv8063/scripts
Modified Files:
Tag: CORESVR
core_server.py
Log Message:
a step closer - actually scored a message!
Index: core_server.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/scripts/Attic/core_server.py,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** core_server.py 24 May 2007 03:19:34 -0000 1.1.2.3
--- core_server.py 2 Jun 2007 21:42:08 -0000 1.1.2.4
***************
*** 19,23 ****
options:
-h : Displays this help message.
! -m module :
Identify plugin module to use (required)
-d FILE : use the named DBM database file
--- 19,23 ----
options:
-h : Displays this help message.
! -P module :
Identify plugin module to use (required)
-d FILE : use the named DBM database file
***************
*** 135,145 ****
return '%s:%d' % (addr, port)
! def load_plugin(name):
try:
! plugin = __import__(name)
except ImportError:
! plugin = __import__("spambayes.%s" % name)
! plugin = getattr(plugin, name)
! return plugin.register()
def main(state):
--- 135,147 ----
return '%s:%d' % (addr, port)
! def load_plugin(name, state):
try:
! plugin_module = __import__(name)
except ImportError:
! plugin_module = __import__("spambayes.%s" % name)
! plugin_module = getattr(plugin_module, name)
! plugin = plugin_module.register()
! plugin.state = state
! return plugin
def main(state):
***************
*** 157,161 ****
# Read the arguments.
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:m:')
except getopt.error, msg:
print >> sys.stderr, str(msg) + '\n\n' + __doc__
--- 159,163 ----
# Read the arguments.
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:P:')
except getopt.error, msg:
print >> sys.stderr, str(msg) + '\n\n' + __doc__
***************
*** 179,191 ****
elif opt == '-o':
options.set_from_cmdline(arg, sys.stderr)
! elif opt == '-m':
! state.plugin = load_plugin(arg)
if state.plugin is None:
print >> sys.stderr, __doc__
sys.exit()
- state.db_name, state.use_db = storage.database_type(opts)
-
# Let the user know what they are using...
v = get_current_version()
--- 181,192 ----
elif opt == '-o':
options.set_from_cmdline(arg, sys.stderr)
! elif opt == '-P':
! state.plugin = load_plugin(arg, state)
if state.plugin is None:
+ print >> sys.stderr, "No plugin argument (-P) was given."
print >> sys.stderr, __doc__
sys.exit()
# Let the user know what they are using...
v = get_current_version()
From montanaro at users.sourceforge.net Sat Jun 2 23:42:10 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sat, 02 Jun 2007 14:42:10 -0700
Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1.2.2,
1.1.2.3 CoreUI.py, 1.1.2.5, 1.1.2.6 XMLRPCPlugin.py, 1.1.2.2,
1.1.2.3
Message-ID: <20070602214216.685791E4004@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv8063/spambayes
Modified Files:
Tag: CORESVR
CorePlugin.py CoreUI.py XMLRPCPlugin.py
Log Message:
a step closer - actually scored a message!
Index: CorePlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CorePlugin.py,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -C2 -d -r1.1.2.2 -r1.1.2.3
*** CorePlugin.py 29 May 2007 01:27:17 -0000 1.1.2.2
--- CorePlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3
***************
*** 12,19 ****
self.name = name
self.ui = ui
- self.hammie = None
-
- def set_hammie(self, hammie):
- self.hammie = hammie
class PluginUI:
--- 12,15 ----
Index: CoreUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -C2 -d -r1.1.2.5 -r1.1.2.6
*** CoreUI.py 24 May 2007 03:19:34 -0000 1.1.2.5
--- CoreUI.py 2 Jun 2007 21:42:08 -0000 1.1.2.6
***************
*** 949,953 ****
possibly overridden by the driver code, create the Bayes object,
the Corpuses, the Trainers and so on."""
- print "Loading database...",
if self.is_test:
self.use_db = "pickle"
--- 949,952 ----
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -C2 -d -r1.1.2.2 -r1.1.2.3
*** XMLRPCPlugin.py 29 May 2007 01:27:17 -0000 1.1.2.2
--- XMLRPCPlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3
***************
*** 8,11 ****
--- 8,12 ----
from spambayes.OptionsClass import *
from spambayes.Options import _, options
+ from spambayes.tokenizer import tokenize
class XMLRPCUI(PluginUI):
***************
*** 36,48 ****
return self.score_mime(mime_message)
! def score_mime(self, msg):
! try:
! if isinstance(msg, (str, unicode)):
! msg = message_from_string(msg)
! tokens = tokenizer.tokenize(msg)
! return self.state.bayes.spamprob(tokens, evidence=True)
! except:
! import traceback
! traceback.print_exc()
def form_to_mime(form, attachments, extra_tokens):
--- 37,47 ----
return self.score_mime(mime_message)
! def score_mime(self, msg, mime_type):
! if self.state.bayes is None:
! self.state.create_workers()
! msg = unicode(msg, mime_type)
! msg = message_from_string(msg)
! tokens = tokenize(msg)
! return self.state.bayes.spamprob(tokens, evidence=True)
def form_to_mime(form, attachments, extra_tokens):
From montanaro at users.sourceforge.net Mon Jun 4 14:28:35 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 04 Jun 2007 05:28:35 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.3,
1.1.2.4
Message-ID: <20070604122840.94D081E4005@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv23471/spambayes
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
+ docstring, refine API a bit
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** XMLRPCPlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3
--- XMLRPCPlugin.py 4 Jun 2007 12:28:33 -0000 1.1.2.4
***************
*** 1,6 ****
import threading
from email import Message, message_from_string
-
from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
--- 1,61 ----
+ """
+ XML-RPC plugin for SpamBayes core server.
+
+ This plugin opens an XML-RPC server in a separate thread listening to the
+ given host and port (default localhost:5001). In Python 2.5 and later it
+ also enforces a path (default /sbrpc).
+
+ SECURITY NOTE: The XML-RPC plugin provide *NO SECURITY*. It would be
+ unwise to listen to anything besides 'localhost'. Similarly, when
+ running the core_server configured with the XML-RPC plugin it's quite
+ likely that the main core_server interface will have to listen to
+ something other than localhost to allow administrators to administer
+ it remotely. Access to that URL should only be available to a set of
+ trusted administrators, probably by proxy through some other webserver
+ which provides the necessary authentication support.
+
+ The XML-RPC server exposes the following two methods:
+
+ score(form_dict, extra_tokens) -> (score, evidence)
+ Scores a dictionary representing the contents of a web
+ submission form and a list of any extra tokens provided
+ by the caller. The return value is a list containing
+ the spam probability of the input and a set of (token,
+ probability) pairs for the most significant tokens.
+
+ score_mime(msg, encoding) -> (score, evidence)
+ Scores a MIME message (a string encoded using encoding).
+ The return value is as for the score method.
+
+ train(form_dict, extra_tokens, is_spam) -> ''
+ Trains the given form and tokens as ham or spam.
+
+ train_mime(msg, encoding, is_spam) -> ''
+ Trains the given MIME message as ham or spam.
+
+ retrain() -> (nham, nspam)
+ Retrain from scratch on all saved MIME messages.
+
+ get_corpus(is_spam) -> string
+ Retrieve the current ham or spam corpus (in Unix mbox format).
+
+ set_corpus(string, is_spam) -> ''
+ Set the current ham or spam corpus (string in Unix mbox format).
+ Should normally be followed by a call to retrain().
+
+ The following options are available in the Plugin section of the options.
+
+ xmlrpc_host - host to listen to (default: localhost)
+ xmlrpc_port - port to listen to (default: 5001)
+ xmlrpc_path - path to support (default: /sbrpc)
+ hambox - path on server to ham corpus (default: TBD...)
+ spambox - path on server to spam corpus (default: TBD...)
+
+ """
+
import threading
+ import xmlrpclib
from email import Message, message_from_string
from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
***************
*** 16,19 ****
--- 71,76 ----
('Plugin', 'xmlrpc_host'),
('Plugin', 'xmlrpc_port'),
+ ('Plugin', 'hambox'),
+ ('Plugin', 'spambox'),
)
***************
*** 27,53 ****
# Path is only enforced in Python 2.5 and later but we set it anyway.
self.server.RequestHandlerClass.rpc_paths = (path,)
! self.server.register_function(self.score)
! self.server.register_function(self.score_mime)
self.thread = threading.Thread(target=self.server.serve_forever)
self.thread.start()
! # placeholders
! def score(self, form, attachments, extra_tokens):
! mime_message = form_to_mime(form, attachments, tokens)
return self.score_mime(mime_message)
! def score_mime(self, msg, mime_type):
if self.state.bayes is None:
self.state.create_workers()
! msg = unicode(msg, mime_type)
msg = message_from_string(msg)
! tokens = tokenize(msg)
! return self.state.bayes.spamprob(tokens, evidence=True)
! def form_to_mime(form, attachments, extra_tokens):
msg = Message.Message()
msg.set_type("multipart/digest")
main = Message.Message()
! main.set_payload(" ".join([str(v) for v in form.values()]))
msg.attach(main)
for msg_type, content in attachments:
--- 84,145 ----
# Path is only enforced in Python 2.5 and later but we set it anyway.
self.server.RequestHandlerClass.rpc_paths = (path,)
! self.server.register_instance(self)
self.thread = threading.Thread(target=self.server.serve_forever)
self.thread.start()
! def _dispatch(self, method, params):
! if method in ("score", "score_mime", "train", "train_mime"):
! return getattr(self, method)(*params)
! elif method in ("retrain", "get_corpus", "set_corpus"):
! return "%s not yet implemented" % method
! else:
! raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
!
! def score(self, form_dict, extra_tokens):
! """Score a dictionary + extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens)
return self.score_mime(mime_message)
! def score_mime(self, msg, encoding):
! """Score a message representing a MIME document.
!
! The msg argument will be a string in the given encoding.
! """
! tokens = self.tokenize(msg, encoding)
! return self.state.bayes.spamprob(tokens, evidence=True)
!
! def train(self, form_dict, extra_tokens, is_spam):
! """Train the form and extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens)
! return self.train_mime(mime_message, is_spam)
!
! def train_mime(self, msg, is_spam):
! """Train the message."""
! tokens = self.tokenize(msg, encoding)
! return self.state.bayes.learn(tokens, is_spam)
!
! def tokenize(self, msg, encoding):
! """Tokenize the message. Make sure the bayes instance is available."""
if self.state.bayes is None:
self.state.create_workers()
! msg = unicode(msg, encoding)
msg = message_from_string(msg)
! return tokenize(msg)
! def form_to_mime(form, mime_type, extra_tokens):
! """Encode submission form bits as a MIME message.
!
! form - a dictionary of key/value pairs representing the form's contents
! extra_tokens - a sequence of synthetic tokens generated by the caller.
! For example, if you include a honeypot hidden field in your form, you
! might generate a synthetic token which tells if it was filled in or not.
! You might also generate tokens which indicate how long a submitting
! username has existed or how many successful posts that username has
! submitted.
! """
msg = Message.Message()
msg.set_type("multipart/digest")
main = Message.Message()
! main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
for msg_type, content in attachments:
From montanaro at users.sourceforge.net Tue Jun 5 04:18:15 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 04 Jun 2007 19:18:15 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.4,
1.1.2.5
Message-ID: <20070605021821.B13B01E4002@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30513
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
Couple minor tweaks
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.4
retrieving revision 1.1.2.5
diff -C2 -d -r1.1.2.4 -r1.1.2.5
*** XMLRPCPlugin.py 4 Jun 2007 12:28:33 -0000 1.1.2.4
--- XMLRPCPlugin.py 5 Jun 2007 02:18:13 -0000 1.1.2.5
***************
*** 16,20 ****
which provides the necessary authentication support.
! The XML-RPC server exposes the following two methods:
score(form_dict, extra_tokens) -> (score, evidence)
--- 16,20 ----
which provides the necessary authentication support.
! The XML-RPC server exposes the following methods:
score(form_dict, extra_tokens) -> (score, evidence)
***************
*** 42,46 ****
set_corpus(string, is_spam) -> ''
! Set the current ham or spam corpus (string in Unix mbox format).
Should normally be followed by a call to retrain().
--- 42,46 ----
set_corpus(string, is_spam) -> ''
! Set the current ham or spam corpus (a string in Unix mbox format).
Should normally be followed by a call to retrain().
***************
*** 112,121 ****
"""Train the form and extra tokens."""
mime_message = form_to_mime(form_dict, extra_tokens)
! return self.train_mime(mime_message, is_spam)
! def train_mime(self, msg, is_spam):
"""Train the message."""
tokens = self.tokenize(msg, encoding)
! return self.state.bayes.learn(tokens, is_spam)
def tokenize(self, msg, encoding):
--- 112,122 ----
"""Train the form and extra tokens."""
mime_message = form_to_mime(form_dict, extra_tokens)
! return self.train_mime(mime_message, "ascii", is_spam)
! def train_mime(self, msg, encoding, is_spam):
"""Train the message."""
tokens = self.tokenize(msg, encoding)
! self.state.bayes.learn(tokens, is_spam)
! return ""
def tokenize(self, msg, encoding):
From montanaro at users.sourceforge.net Tue Jun 5 04:43:44 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 04 Jun 2007 19:43:44 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.5,
1.1.2.6
Message-ID: <20070605024348.487331E4002@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7990
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
Dump the training-related methods. I think the core server will manage
training.
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -C2 -d -r1.1.2.5 -r1.1.2.6
*** XMLRPCPlugin.py 5 Jun 2007 02:18:13 -0000 1.1.2.5
--- XMLRPCPlugin.py 5 Jun 2007 02:43:41 -0000 1.1.2.6
***************
*** 29,48 ****
The return value is as for the score method.
- train(form_dict, extra_tokens, is_spam) -> ''
- Trains the given form and tokens as ham or spam.
-
- train_mime(msg, encoding, is_spam) -> ''
- Trains the given MIME message as ham or spam.
-
- retrain() -> (nham, nspam)
- Retrain from scratch on all saved MIME messages.
-
- get_corpus(is_spam) -> string
- Retrieve the current ham or spam corpus (in Unix mbox format).
-
- set_corpus(string, is_spam) -> ''
- Set the current ham or spam corpus (a string in Unix mbox format).
- Should normally be followed by a call to retrain().
-
The following options are available in the Plugin section of the options.
--- 29,32 ----
***************
*** 50,55 ****
xmlrpc_port - port to listen to (default: 5001)
xmlrpc_path - path to support (default: /sbrpc)
- hambox - path on server to ham corpus (default: TBD...)
- spambox - path on server to spam corpus (default: TBD...)
"""
--- 34,37 ----
***************
*** 71,76 ****
('Plugin', 'xmlrpc_host'),
('Plugin', 'xmlrpc_port'),
- ('Plugin', 'hambox'),
- ('Plugin', 'spambox'),
)
--- 53,56 ----
***************
*** 89,96 ****
def _dispatch(self, method, params):
! if method in ("score", "score_mime", "train", "train_mime"):
return getattr(self, method)(*params)
- elif method in ("retrain", "get_corpus", "set_corpus"):
- return "%s not yet implemented" % method
else:
raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
--- 69,74 ----
def _dispatch(self, method, params):
! if method in ("score", "score_mime"):
return getattr(self, method)(*params)
else:
raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
***************
*** 109,123 ****
return self.state.bayes.spamprob(tokens, evidence=True)
- def train(self, form_dict, extra_tokens, is_spam):
- """Train the form and extra tokens."""
- mime_message = form_to_mime(form_dict, extra_tokens)
- return self.train_mime(mime_message, "ascii", is_spam)
-
- def train_mime(self, msg, encoding, is_spam):
- """Train the message."""
- tokens = self.tokenize(msg, encoding)
- self.state.bayes.learn(tokens, is_spam)
- return ""
-
def tokenize(self, msg, encoding):
"""Tokenize the message. Make sure the bayes instance is available."""
--- 87,90 ----
From montanaro at users.sourceforge.net Wed Jun 6 05:30:49 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Tue, 05 Jun 2007 20:30:49 -0700
Subject: [Spambayes-checkins] spambayes/spambayes CoreUI.py, 1.1.2.6, 1.1.2.7
Message-ID: <20070606033054.38AC91E4005@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13991
Modified Files:
Tag: CORESVR
CoreUI.py
Log Message:
Avoid shadowing "header" loop vrbl. Correct capitalization.
Index: CoreUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** CoreUI.py 2 Jun 2007 21:42:08 -0000 1.1.2.6
--- CoreUI.py 6 Jun 2007 03:30:45 -0000 1.1.2.7
***************
*** 606,613 ****
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (header.lower(),)
! h.headerName = header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
--- 606,613 ----
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for disp_header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (disp_header.lower(),)
! h.headerName = disp_header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
***************
*** 1029,1033 ****
else:
self.numUnsure += 1
! self.stats.recordClassification(score)
def buildStatusStrings(self):
--- 1029,1033 ----
else:
self.numUnsure += 1
! self.stats.RecordClassification(score)
def buildStatusStrings(self):
From montanaro at users.sourceforge.net Wed Jun 6 05:31:27 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Tue, 05 Jun 2007 20:31:27 -0700
Subject: [Spambayes-checkins] spambayes/spambayes ProxyUI.py,1.64,1.64.2.1
Message-ID: <20070606033131.0FFCE1E4005@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv14398
Modified Files:
Tag: CORESVR
ProxyUI.py
Log Message:
Avoid shadowing "header" loop vrbl.
Index: ProxyUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v
retrieving revision 1.64
retrieving revision 1.64.2.1
diff -C2 -d -r1.64 -r1.64.2.1
*** ProxyUI.py 28 Nov 2005 10:54:18 -0000 1.64
--- ProxyUI.py 6 Jun 2007 03:31:25 -0000 1.64.2.1
***************
*** 619,626 ****
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (header.lower(),)
! h.headerName = header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
--- 619,626 ----
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for disp_header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (disp_header.lower(),)
! h.headerName = disp_header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
From montanaro at users.sourceforge.net Wed Jun 6 05:37:41 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Tue, 05 Jun 2007 20:37:41 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.6,
1.1.2.7
Message-ID: <20070606033744.626CE1E4005@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv16690
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
cache messages
make xmlrpc thread a daemon
convert msg text to utf-8 string if it's unicode
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** XMLRPCPlugin.py 5 Jun 2007 02:43:41 -0000 1.1.2.6
--- XMLRPCPlugin.py 6 Jun 2007 03:37:37 -0000 1.1.2.7
***************
*** 46,49 ****
--- 46,50 ----
from spambayes.Options import _, options
from spambayes.tokenizer import tokenize
+ import spambayes.message
class XMLRPCUI(PluginUI):
***************
*** 66,69 ****
--- 67,71 ----
self.server.register_instance(self)
self.thread = threading.Thread(target=self.server.serve_forever)
+ self.thread.setDaemon(True)
self.thread.start()
***************
*** 79,97 ****
return self.score_mime(mime_message)
! def score_mime(self, msg, encoding):
"""Score a message representing a MIME document.
The msg argument will be a string in the given encoding.
"""
- tokens = self.tokenize(msg, encoding)
- return self.state.bayes.spamprob(tokens, evidence=True)
! def tokenize(self, msg, encoding):
! """Tokenize the message. Make sure the bayes instance is available."""
if self.state.bayes is None:
self.state.create_workers()
! msg = unicode(msg, encoding)
! msg = message_from_string(msg)
! return tokenize(msg)
def form_to_mime(form, mime_type, extra_tokens):
--- 81,114 ----
return self.score_mime(mime_message)
! def score_mime(self, msg_text, encoding):
"""Score a message representing a MIME document.
The msg argument will be a string in the given encoding.
"""
! # XXX Much of this probably belongs in the core server...
!
if self.state.bayes is None:
self.state.create_workers()
! if isinstance(msg_text, unicode):
! msg_text = msg_text.encode("utf-8")
! msg = message_from_string(msg_text,
! _class=spambayes.message.SBHeaderMessage)
!
! tokens = tokenize(msg)
!
! # XXX Maybe from here on down...
!
! prob = self.state.bayes.spamprob(tokens, evidence=False)
! self.state.record_classification(msg.GetClassification(), prob)
!
! # Cache the message.
! if not self.state.is_test and options["Storage", "cache_messages"]:
! msg.setId(self.state.getNewMessageName())
! # Write the message into the Unknown cache.
! makeMessage = self.state.unknownCorpus.makeMessage
! message = makeMessage(msg.getId(), msg.as_string())
! self.state.unknownCorpus.addMessage(message)
! return prob
def form_to_mime(form, mime_type, extra_tokens):
From montanaro at users.sourceforge.net Thu Jun 7 00:24:19 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Wed, 06 Jun 2007 15:24:19 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.7,
1.1.2.8
Message-ID: <20070606222426.897431E4006@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv10998
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
We scored our first (fake) form submission today. Yay!
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** XMLRPCPlugin.py 6 Jun 2007 03:37:37 -0000 1.1.2.7
--- XMLRPCPlugin.py 6 Jun 2007 22:24:17 -0000 1.1.2.8
***************
*** 76,83 ****
raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
! def score(self, form_dict, extra_tokens):
"""Score a dictionary + extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens)
! return self.score_mime(mime_message)
def score_mime(self, msg_text, encoding):
--- 76,84 ----
raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
! def score(self, form_dict, extra_tokens, attachments):
"""Score a dictionary + extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens, attachments)
! mime_message = unicode(mime_message).encode("utf-8")
! return self.score_mime(mime_message, "utf-8")
def score_mime(self, msg_text, encoding):
***************
*** 100,104 ****
# XXX Maybe from here on down...
! prob = self.state.bayes.spamprob(tokens, evidence=False)
self.state.record_classification(msg.GetClassification(), prob)
--- 101,107 ----
# XXX Maybe from here on down...
! prob, clues = self.state.bayes.spamprob(tokens, evidence=True)
! msg.addSBHeaders(prob, clues)
!
self.state.record_classification(msg.GetClassification(), prob)
***************
*** 112,140 ****
return prob
! def form_to_mime(form, mime_type, extra_tokens):
"""Encode submission form bits as a MIME message.
form - a dictionary of key/value pairs representing the form's contents
extra_tokens - a sequence of synthetic tokens generated by the caller.
! For example, if you include a honeypot hidden field in your form, you
! might generate a synthetic token which tells if it was filled in or not.
! You might also generate tokens which indicate how long a submitting
! username has existed or how many successful posts that username has
! submitted.
"""
msg = Message.Message()
msg.set_type("multipart/digest")
main = Message.Message()
main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
! for msg_type, content in attachments:
attachment = Message.Message()
! attachment.set_type(msg_type)
! attachment.set_payload(content)
msg.attach(attachment)
! if extra_tokens:
! extra = Message.Message()
! extra.set_payload(" ".join(extra_tokens))
! msg.attach(extra)
return msg
--- 115,160 ----
return prob
! def form_to_mime(form, extra_tokens, attachments):
"""Encode submission form bits as a MIME message.
form - a dictionary of key/value pairs representing the form's contents
extra_tokens - a sequence of synthetic tokens generated by the caller.
! For example, if you include a honeypot hidden field in your form, you
! might generate a synthetic token which tells if it was filled in or not.
! You might also generate tokens which indicate how long a submitting
! username has existed or how many successful posts that username has
! submitted.
! attachments - list of dictionaries describing an attachment.
! The 'payload' key is required. If there is no 'content-type' key
! 'application/octet-stream' is assumed. If 'content-transfer-encoding'
! is given it will be added to the headers of the attachment. Note that
! the keys are case-sensitive and must be lower case.
"""
msg = Message.Message()
msg.set_type("multipart/digest")
+ msg.add_header("Subject", "Form submission")
+
main = Message.Message()
main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
!
! # Always add the extra tokens payload so we can reliably reverse the
! # conversion.
! extra = Message.Message()
! extra.set_type("text/plain")
! extra.set_payload("\n".join(extra_tokens))
! msg.attach(extra)
!
! # Any further payloads are for the attachments.
! for content in attachments:
! mime_type = content.get("content-type") or "application/octet-stream"
attachment = Message.Message()
! if "content-transfer-encoding" in content:
! attachment.add_header("Content-Transfer-Encoding",
! content["content-transfer-encoding"])
! attachment.set_type(mime_type)
! attachment.set_payload(content["payload"])
msg.attach(attachment)
!
return msg
From montanaro at users.sourceforge.net Thu Jun 7 01:29:16 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Wed, 06 Jun 2007 16:29:16 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.8,
1.1.2.9
Message-ID: <20070606232921.5A00A1E4007@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv4360
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
Add a From: header and content type for the main form items.
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** XMLRPCPlugin.py 6 Jun 2007 22:24:17 -0000 1.1.2.8
--- XMLRPCPlugin.py 6 Jun 2007 23:29:13 -0000 1.1.2.9
***************
*** 134,140 ****
msg.set_type("multipart/digest")
msg.add_header("Subject", "Form submission")
main = Message.Message()
! main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
--- 134,142 ----
msg.set_type("multipart/digest")
msg.add_header("Subject", "Form submission")
+ msg.add_header("From", "SpamBayes XMLRPC Plugin ")
main = Message.Message()
! main.set_type("text/plain")
! main.set_payload("\n".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
From montanaro at users.sourceforge.net Thu Jun 7 03:23:43 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Wed, 06 Jun 2007 18:23:43 -0700
Subject: [Spambayes-checkins] spambayes/spambayes WebAppPlugin.py, 1.1.2.2,
NONE
Message-ID: <20070607012347.7D9C41E4005@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv15442
Removed Files:
Tag: CORESVR
WebAppPlugin.py
Log Message:
thought I removed this already
--- WebAppPlugin.py DELETED ---
From montanaro at users.sourceforge.net Thu Jun 7 04:50:17 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Wed, 06 Jun 2007 19:50:17 -0700
Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1.2.3,
1.1.2.4 XMLRPCPlugin.py, 1.1.2.9, 1.1.2.10
Message-ID: <20070607025022.901B91E400B@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv17014
Modified Files:
Tag: CORESVR
CorePlugin.py XMLRPCPlugin.py
Log Message:
a couple small cleanups suggested by pylint
Index: CorePlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CorePlugin.py,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** CorePlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3
--- CorePlugin.py 7 Jun 2007 02:50:13 -0000 1.1.2.4
***************
*** 3,8 ****
"""
- import sys
-
__author__ = "Skip Montanaro
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv16141/spambayes
Modified Files:
Tag: CORESVR
CoreUI.py ProxyUI.py UserInterface.py
Log Message:
trivial refactoring (these should probably both be functions, not methods)
Index: CoreUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** CoreUI.py 6 Jun 2007 03:30:45 -0000 1.1.2.7
--- CoreUI.py 8 Jun 2007 02:09:51 -0000 1.1.2.8
***************
*** 206,226 ****
self.write(_("OK. Return Home.
"))
- def _keyToTimestamp(self, key):
- """Given a message key (as seen in a Corpus), returns the timestamp
- for that message. This is the time that the message was received,
- not the Date header."""
- return long(key[:10])
-
- def _getTimeRange(self, timestamp):
- """Given a unix timestamp, returns a 3-tuple: the start timestamp
- of the given day, the end timestamp of the given day, and the
- formatted date of the given day."""
- this = time.localtime(timestamp)
- start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8])
- end = time.localtime(time.mktime(start) + 36*60*60)
- end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8])
- date = time.strftime("%A, %B %d, %Y", start)
- return time.mktime(start), time.mktime(end), date
-
def _buildReviewKeys(self, timestamp):
"""Builds an ordered list of untrained message keys, ready for output
--- 206,209 ----
Index: ProxyUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v
retrieving revision 1.64.2.1
retrieving revision 1.64.2.2
diff -C2 -d -r1.64.2.1 -r1.64.2.2
*** ProxyUI.py 6 Jun 2007 03:31:25 -0000 1.64.2.1
--- ProxyUI.py 8 Jun 2007 02:09:51 -0000 1.64.2.2
***************
*** 111,115 ****
)
! # Like the above, but hese are the options that will be offered on the
# advanced configuration page.
adv_map = (
--- 111,115 ----
)
! # Like the above, but these are the options that will be offered on the
# advanced configuration page.
adv_map = (
***************
*** 221,241 ****
self.write(_("OK. Return Home.
"))
- def _keyToTimestamp(self, key):
- """Given a message key (as seen in a Corpus), returns the timestamp
- for that message. This is the time that the message was received,
- not the Date header."""
- return long(key[:10])
-
- def _getTimeRange(self, timestamp):
- """Given a unix timestamp, returns a 3-tuple: the start timestamp
- of the given day, the end timestamp of the given day, and the
- formatted date of the given day."""
- this = time.localtime(timestamp)
- start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8])
- end = time.localtime(time.mktime(start) + 36*60*60)
- end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8])
- date = time.strftime("%A, %B %d, %Y", start)
- return time.mktime(start), time.mktime(end), date
-
def _buildReviewKeys(self, timestamp):
"""Builds an ordered list of untrained message keys, ready for output
--- 221,224 ----
Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.61.2.1
retrieving revision 1.61.2.2
diff -C2 -d -r1.61.2.1 -r1.61.2.2
*** UserInterface.py 24 May 2007 03:19:34 -0000 1.61.2.1
--- UserInterface.py 8 Jun 2007 02:09:51 -0000 1.61.2.2
***************
*** 1253,1254 ****
--- 1253,1273 ----
lines.append(''.join(cur_line))
return lines
+
+ def _keyToTimestamp(self, key):
+ """Given a message key (as seen in a Corpus), returns the timestamp
+ for that message. This is the time that the message was received,
+ not the Date header."""
+ return long(key[:10])
+
+ def _getTimeRange(self, timestamp):
+ """Given a unix timestamp, returns a 3-tuple: the start timestamp
+ of the given day, the end timestamp of the given day, and the
+ formatted date of the given day."""
+ this = time.localtime(timestamp)
+ start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8])
+ end = time.localtime(time.mktime(start) + 36*60*60)
+ end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8])
+ date = time.strftime("%A, %B %d, %Y", start)
+ return time.mktime(start), time.mktime(end), date
+
+
From montanaro at users.sourceforge.net Fri Jun 8 14:08:49 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Fri, 08 Jun 2007 05:08:49 -0700
Subject: [Spambayes-checkins] spambayes/spambayes CoreUI.py, 1.1.2.8,
1.1.2.9 ProxyUI.py, 1.64.2.2, 1.64.2.3 UserInterface.py,
1.61.2.2, 1.61.2.3
Message-ID: <20070608120856.EA2761E4006@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20242/spambayes
Modified Files:
Tag: CORESVR
CoreUI.py ProxyUI.py UserInterface.py
Log Message:
A couple more refactorings. More will be possible once I get rid of holding
state as a module-level global variable in the pop3 proxy.
Index: CoreUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/CoreUI.py,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** CoreUI.py 8 Jun 2007 02:09:51 -0000 1.1.2.8
--- CoreUI.py 8 Jun 2007 12:08:46 -0000 1.1.2.9
***************
*** 54,58 ****
import sys
- import re
import cgi
import time
--- 54,57 ----
***************
*** 72,81 ****
from spambayes.compatsets import Set
- from email.Iterators import typed_subpart_iterator
-
import UserInterface
- from spambayes import tokenizer
from spambayes.Options import options, load_options, get_pathname_option, _
! from spambayes import i18n
from spambayes import storage
from spambayes import Stats
--- 71,78 ----
from spambayes.compatsets import Set
import UserInterface
from spambayes.Options import options, load_options, get_pathname_option, _
! ## no i18n yet...
! ##from spambayes import i18n
from spambayes import storage
from spambayes import Stats
***************
*** 161,165 ****
self.state = state
self.app_for_version = "SpamBayes Proxy"
- self.previous_sort = None
if not state.can_stop:
self.html._readonly = False
--- 158,161 ----
***************
*** 246,351 ****
return keys, date, prior, start, end
- def _sortMessages(self, messages, sort_order, reverse=False):
- """Sorts the message by the appropriate attribute. If this was the
- previous sort order, then reverse it."""
- if sort_order is None or sort_order == "received":
- # Default sorting, which is in reverse order of appearance.
- # This is complicated because the 'received' info is the key.
- messages.sort()
- if self.previous_sort == sort_order:
- messages.reverse()
- self.previous_sort = None
- else:
- self.previous_sort = 'received'
- return messages
- tmplist = [(getattr(x[1], sort_order), x) for x in messages]
- tmplist.sort()
- if reverse:
- tmplist.reverse()
- return [x for (key, x) in tmplist]
-
- def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
- reverse=False):
- """Appends the rows of a table of messages to 'table'."""
- stripe = 0
-
- keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
- reverse)
- nrows = options["html_ui", "rows_per_section"]
- for key, messageInfo in keyedMessageInfo[:nrows]:
- unused, unused, messageInfo.received = \
- self._getTimeRange(self._keyToTimestamp(key))
- row = self.html.reviewRow.clone()
- try:
- score = messageInfo.score
- except ValueError:
- score = None
- if label == _('Spam'):
- if score is not None \
- and score > options["html_ui", "spam_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_spam_action"])
- elif label == _('Ham'):
- if score is not None \
- and score < options["html_ui", "ham_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_ham_action"])
- else:
- r_att = getattr(row, options["html_ui",
- "default_unsure_action"])
- setattr(r_att, "checked", 1)
-
- row.optionalHeadersValues = '' # make way for real list
- for header in options["html_ui", "display_headers"]:
- header = header.lower()
- text = getattr(messageInfo, "%sHeader" % (header,))
- if header == "subject":
- # Subject is special, because it links to the body.
- # If the user doesn't display the subject, then there
- # is no link to the body.
- h = self.html.reviewRow.linkedHeaderValue.clone()
- h.text.title = messageInfo.bodySummary
- h.text.href = "view?key=%s&corpus=%s" % (key, label)
- else:
- h = self.html.reviewRow.headerValue.clone()
- h.text = text
- row.optionalHeadersValues += h
-
- # Apart from any message headers, we may also wish to display
- # the message score, and the time the message was received.
- if options["html_ui", "display_score"]:
- if isinstance(messageInfo.score, types.StringTypes):
- # Presumably either "?" or "Err".
- row.score_ = messageInfo.score
- else:
- row.score_ = "%.2f%%" % (messageInfo.score,)
- else:
- del row.score_
- if options["html_ui", "display_received_time"]:
- row.received_ = messageInfo.received
- else:
- del row.received_
-
- # Many characters can't go in the URL or they cause problems
- # (&, ;, ?, etc). So we use the hex values for them all.
- subj_list = []
- for c in messageInfo.subjectHeader:
- subj_list.append("%%%s" % (hex(ord(c))[2:],))
- subj = "".join(subj_list)
- row.classify.href = "showclues?key=%s&subject=%s" % (key, subj)
- row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" %
- (key, subj))
- setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
- setattr(row, 'onMouseOut',
- ["this.className='stripe_on';",
- "this.className='stripe_off';"][stripe])
- row = str(row).replace('TYPE', label).replace('KEY', key)
- table += row
- stripe = stripe ^ 1
-
def onReview(self, **params):
"""Present a list of message for (re)training."""
--- 242,245 ----
***************
*** 619,631 ****
self._writePostamble(help_topic="review")
- def _contains(self, a, b, ignore_case=False):
- """Return true if substring b is part of string a."""
- assert isinstance(a, types.StringTypes)
- assert isinstance(b, types.StringTypes)
- if ignore_case:
- a = a.lower()
- b = b.lower()
- return a.find(b) >= 0
-
def onView(self, key, corpus):
"""View a message - linked from the Review page."""
--- 513,516 ----
***************
*** 688,755 ****
self.write(html)
- def _makeMessageInfo(self, message):
- """Given an email.Message, return an object with subjectHeader,
- bodySummary and other header (as needed) attributes. These objects
- are passed into appendMessages by onReview - passing email.Message
- objects directly uses too much memory.
- """
- # Remove notations before displaying - see:
- # [ 848365 ] Remove subject annotations from message review page
- message.delNotations()
- subjectHeader = message["Subject"] or "(none)"
- headers = {"subject" : subjectHeader}
- for header in options["html_ui", "display_headers"]:
- headers[header.lower()] = (message[header] or "(none)")
- score = message[options["Headers", "score_header_name"]]
- if score:
- # the score might have the log info at the end
- op = score.find('(')
- if op >= 0:
- score = score[:op]
- try:
- score = float(score) * 100
- except ValueError:
- # Hmm. The score header should only contain a floating
- # point number. What's going on here, then?
- score = "Err" # Let the user know something is wrong.
- else:
- # If the lookup fails, this means that the "include_score"
- # option isn't activated. We have the choice here to either
- # calculate it now, which is pretty inefficient, since we have
- # already done so, or to admit that we don't know what it is.
- # We'll go with the latter.
- score = "?"
- try:
- part = typed_subpart_iterator(message, 'text', 'plain').next()
- text = part.get_payload()
- except StopIteration:
- try:
- part = typed_subpart_iterator(message, 'text', 'html').next()
- text = part.get_payload()
- text, unused = tokenizer.crack_html_style(text)
- text, unused = tokenizer.crack_html_comment(text)
- text = tokenizer.html_re.sub(' ', text)
- text = _('(this message only has an HTML body)\n') + text
- except StopIteration:
- text = _('(this message has no text body)')
- if type(text) == type([]): # gotta be a 'right' way to do this
- text = _("(this message is a digest of %s messages)") % (len(text))
- elif text is None:
- text = _("(this message has no body)")
- else:
- text = text.replace(' ', ' ') # Else they'll be quoted
- text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
- text = text.strip()
-
- class _MessageInfo:
- pass
- messageInfo = _MessageInfo()
- for headerName, headerValue in headers.items():
- headerValue = self._trimHeader(headerValue, 45, True)
- setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
- messageInfo.score = score
- messageInfo.bodySummary = self._trimHeader(text, 200)
- return messageInfo
-
def close_database(self):
self.state.close()
--- 573,576 ----
***************
*** 824,827 ****
--- 645,651 ----
self.is_test = False
+ self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
+ self.spam_trainer = self.ham_trainer = None
+
self.init()
Index: ProxyUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v
retrieving revision 1.64.2.2
retrieving revision 1.64.2.3
diff -C2 -d -r1.64.2.2 -r1.64.2.3
*** ProxyUI.py 8 Jun 2007 02:09:51 -0000 1.64.2.2
--- ProxyUI.py 8 Jun 2007 12:08:46 -0000 1.64.2.3
***************
*** 54,58 ****
True, False = 1, 0
- import re
import cgi
import time
--- 54,57 ----
***************
*** 72,81 ****
from spambayes.compatsets import Set
- import tokenizer
import UserInterface
from spambayes.Options import options, _
- from email.Iterators import typed_subpart_iterator
! global state
# These are the options that will be offered on the configuration page.
--- 71,78 ----
from spambayes.compatsets import Set
import UserInterface
from spambayes.Options import options, _
! state = None
# These are the options that will be offered on the configuration page.
***************
*** 174,178 ****
self.state_recreator = state_recreator # ugly
self.app_for_version = "SpamBayes Proxy"
- self.previous_sort = None
if not proxy_state.can_stop:
self.html._readonly = False
--- 171,174 ----
***************
*** 261,365 ****
return keys, date, prior, start, end
- def _sortMessages(self, messages, sort_order, reverse=False):
- """Sorts the message by the appropriate attribute. If this was the
- previous sort order, then reverse it."""
- if sort_order is None or sort_order == "received":
- # Default sorting, which is in reverse order of appearance.
- # This is complicated because the 'received' info is the key.
- messages.sort()
- if self.previous_sort == sort_order:
- messages.reverse()
- self.previous_sort = None
- else:
- self.previous_sort = 'received'
- return messages
- tmplist = [(getattr(x[1], sort_order), x) for x in messages]
- tmplist.sort()
- if reverse:
- tmplist.reverse()
- return [x for (key, x) in tmplist]
-
- def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
- reverse=False):
- """Appends the rows of a table of messages to 'table'."""
- stripe = 0
-
- keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
- reverse)
- nrows = options["html_ui", "rows_per_section"]
- for key, messageInfo in keyedMessageInfo[:nrows]:
- unused, unused, messageInfo.received = \
- self._getTimeRange(self._keyToTimestamp(key))
- row = self.html.reviewRow.clone()
- try:
- score = messageInfo.score
- except ValueError:
- score = None
- if label == _('Spam'):
- if score is not None \
- and score > options["html_ui", "spam_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_spam_action"])
- elif label == _('Ham'):
- if score is not None \
- and score < options["html_ui", "ham_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_ham_action"])
- else:
- r_att = getattr(row, options["html_ui",
- "default_unsure_action"])
- setattr(r_att, "checked", 1)
-
- row.optionalHeadersValues = '' # make way for real list
- for header in options["html_ui", "display_headers"]:
- header = header.lower()
- text = getattr(messageInfo, "%sHeader" % (header,))
- if header == "subject":
- # Subject is special, because it links to the body.
- # If the user doesn't display the subject, then there
- # is no link to the body.
- h = self.html.reviewRow.linkedHeaderValue.clone()
- h.text.title = messageInfo.bodySummary
- h.text.href = "view?key=%s&corpus=%s" % (key, label)
- else:
- h = self.html.reviewRow.headerValue.clone()
- h.text = text
- row.optionalHeadersValues += h
-
- # Apart from any message headers, we may also wish to display
- # the message score, and the time the message was received.
- if options["html_ui", "display_score"]:
- if isinstance(messageInfo.score, types.StringTypes):
- # Presumably either "?" or "Err".
- row.score_ = messageInfo.score
- else:
- row.score_ = "%.2f%%" % (messageInfo.score,)
- else:
- del row.score_
- if options["html_ui", "display_received_time"]:
- row.received_ = messageInfo.received
- else:
- del row.received_
-
- # Many characters can't go in the URL or they cause problems
- # (&, ;, ?, etc). So we use the hex values for them all.
- subj_list = []
- for c in messageInfo.subjectHeader:
- subj_list.append("%%%s" % (hex(ord(c))[2:],))
- subj = "".join(subj_list)
- row.classify.href="showclues?key=%s&subject=%s" % (key, subj)
- row.tokens.href="showclues?key=%s&subject=%s&tokens=1" % (key, subj)
- setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
- setattr(row, 'onMouseOut',
- ["this.className='stripe_on';",
- "this.className='stripe_off';"][stripe])
- row = str(row).replace('TYPE', label).replace('KEY', key)
- table += row
- stripe = stripe ^ 1
-
def onReview(self, **params):
"""Present a list of message for (re)training."""
--- 257,260 ----
***************
*** 632,644 ****
self._writePostamble(help_topic="review")
- def _contains(self, a, b, ignore_case=False):
- """Return true if substring b is part of string a."""
- assert isinstance(a, types.StringTypes)
- assert isinstance(b, types.StringTypes)
- if ignore_case:
- a = a.lower()
- b = b.lower()
- return a.find(b) >= 0
-
def onView(self, key, corpus):
"""View a message - linked from the Review page."""
--- 527,530 ----
***************
*** 685,752 ****
self._writePostamble()
- def _makeMessageInfo(self, message):
- """Given an email.Message, return an object with subjectHeader,
- bodySummary and other header (as needed) attributes. These objects
- are passed into appendMessages by onReview - passing email.Message
- objects directly uses too much memory.
- """
- # Remove notations before displaying - see:
- # [ 848365 ] Remove subject annotations from message review page
- message.delNotations()
- subjectHeader = message["Subject"] or "(none)"
- headers = {"subject" : subjectHeader}
- for header in options["html_ui", "display_headers"]:
- headers[header.lower()] = (message[header] or "(none)")
- score = message[options["Headers", "score_header_name"]]
- if score:
- # the score might have the log info at the end
- op = score.find('(')
- if op >= 0:
- score = score[:op]
- try:
- score = float(score) * 100
- except ValueError:
- # Hmm. The score header should only contain a floating
- # point number. What's going on here, then?
- score = "Err" # Let the user know something is wrong.
- else:
- # If the lookup fails, this means that the "include_score"
- # option isn't activated. We have the choice here to either
- # calculate it now, which is pretty inefficient, since we have
- # already done so, or to admit that we don't know what it is.
- # We'll go with the latter.
- score = "?"
- try:
- part = typed_subpart_iterator(message, 'text', 'plain').next()
- text = part.get_payload()
- except StopIteration:
- try:
- part = typed_subpart_iterator(message, 'text', 'html').next()
- text = part.get_payload()
- text, unused = tokenizer.crack_html_style(text)
- text, unused = tokenizer.crack_html_comment(text)
- text = tokenizer.html_re.sub(' ', text)
- text = _('(this message only has an HTML body)\n') + text
- except StopIteration:
- text = _('(this message has no text body)')
- if type(text) == type([]): # gotta be a 'right' way to do this
- text = _("(this message is a digest of %s messages)") % (len(text))
- elif text is None:
- text = _("(this message has no body)")
- else:
- text = text.replace(' ', ' ') # Else they'll be quoted
- text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
- text = text.strip()
-
- class _MessageInfo:
- pass
- messageInfo = _MessageInfo()
- for headerName, headerValue in headers.items():
- headerValue = self._trimHeader(headerValue, 45, True)
- setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
- messageInfo.score = score
- messageInfo.bodySummary = self._trimHeader(text, 200)
- return messageInfo
-
def close_database(self):
state.close()
--- 571,574 ----
Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.61.2.2
retrieving revision 1.61.2.3
diff -C2 -d -r1.61.2.2 -r1.61.2.3
*** UserInterface.py 8 Jun 2007 02:09:51 -0000 1.61.2.2
--- UserInterface.py 8 Jun 2007 12:08:46 -0000 1.61.2.3
***************
*** 80,83 ****
--- 80,84 ----
import types
import StringIO
+ from email.Iterators import typed_subpart_iterator
import oe_mailbox
***************
*** 277,280 ****
--- 278,282 ----
self.stats = stats
self.app_for_version = None # subclasses must fill this in
+ self.previous_sort = None
def onClassify(self, file, text, which):
***************
*** 1271,1273 ****
--- 1273,1448 ----
return time.mktime(start), time.mktime(end), date
+ def _sortMessages(self, messages, sort_order, reverse=False):
+ """Sorts the message by the appropriate attribute. If this was the
+ previous sort order, then reverse it."""
+ if sort_order is None or sort_order == "received":
+ # Default sorting, which is in reverse order of appearance.
+ # This is complicated because the 'received' info is the key.
+ messages.sort()
+ if self.previous_sort == sort_order:
+ messages.reverse()
+ self.previous_sort = None
+ else:
+ self.previous_sort = 'received'
+ return messages
+ tmplist = [(getattr(x[1], sort_order), x) for x in messages]
+ tmplist.sort()
+ if reverse:
+ tmplist.reverse()
+ return [x for (key, x) in tmplist]
+
+ def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
+ reverse=False):
+ """Appends the rows of a table of messages to 'table'."""
+ stripe = 0
+
+ keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
+ reverse)
+ nrows = options["html_ui", "rows_per_section"]
+ for key, messageInfo in keyedMessageInfo[:nrows]:
+ unused, unused, messageInfo.received = \
+ self._getTimeRange(self._keyToTimestamp(key))
+ row = self.html.reviewRow.clone()
+ try:
+ score = messageInfo.score
+ except ValueError:
+ score = None
+ if label == _('Spam'):
+ if score is not None \
+ and score > options["html_ui", "spam_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_spam_action"])
+ elif label == _('Ham'):
+ if score is not None \
+ and score < options["html_ui", "ham_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_ham_action"])
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_unsure_action"])
+ setattr(r_att, "checked", 1)
+
+ row.optionalHeadersValues = '' # make way for real list
+ for header in options["html_ui", "display_headers"]:
+ header = header.lower()
+ text = getattr(messageInfo, "%sHeader" % (header,))
+ if header == "subject":
+ # Subject is special, because it links to the body.
+ # If the user doesn't display the subject, then there
+ # is no link to the body.
+ h = self.html.reviewRow.linkedHeaderValue.clone()
+ h.text.title = messageInfo.bodySummary
+ h.text.href = "view?key=%s&corpus=%s" % (key, label)
+ else:
+ h = self.html.reviewRow.headerValue.clone()
+ h.text = text
+ row.optionalHeadersValues += h
+
+ # Apart from any message headers, we may also wish to display
+ # the message score, and the time the message was received.
+ if options["html_ui", "display_score"]:
+ if isinstance(messageInfo.score, types.StringTypes):
+ # Presumably either "?" or "Err".
+ row.score_ = messageInfo.score
+ else:
+ row.score_ = "%.2f%%" % (messageInfo.score,)
+ else:
+ del row.score_
+ if options["html_ui", "display_received_time"]:
+ row.received_ = messageInfo.received
+ else:
+ del row.received_
+ # Many characters can't go in the URL or they cause problems
+ # (&, ;, ?, etc). So we use the hex values for them all.
+ subj_list = []
+ for c in messageInfo.subjectHeader:
+ subj_list.append("%%%s" % (hex(ord(c))[2:],))
+ subj = "".join(subj_list)
+ row.classify.href = "showclues?key=%s&subject=%s" % (key, subj)
+ row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" %
+ (key, subj))
+ setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
+ setattr(row, 'onMouseOut',
+ ["this.className='stripe_on';",
+ "this.className='stripe_off';"][stripe])
+ row = str(row).replace('TYPE', label).replace('KEY', key)
+ table += row
+ stripe = stripe ^ 1
+
+ def _contains(self, a, b, ignore_case=False):
+ """Return true if substring b is part of string a."""
+ assert isinstance(a, types.StringTypes)
+ assert isinstance(b, types.StringTypes)
+ if ignore_case:
+ a = a.lower()
+ b = b.lower()
+ return a.find(b) >= 0
+
+ def _makeMessageInfo(self, message):
+ """Given an email.Message, return an object with subjectHeader,
+ bodySummary and other header (as needed) attributes. These objects
+ are passed into appendMessages by onReview - passing email.Message
+ objects directly uses too much memory.
+ """
+ # Remove notations before displaying - see:
+ # [ 848365 ] Remove subject annotations from message review page
+ message.delNotations()
+ subjectHeader = message["Subject"] or "(none)"
+ headers = {"subject" : subjectHeader}
+ for header in options["html_ui", "display_headers"]:
+ headers[header.lower()] = (message[header] or "(none)")
+ score = message[options["Headers", "score_header_name"]]
+ if score:
+ # the score might have the log info at the end
+ op = score.find('(')
+ if op >= 0:
+ score = score[:op]
+ try:
+ score = float(score) * 100
+ except ValueError:
+ # Hmm. The score header should only contain a floating
+ # point number. What's going on here, then?
+ score = "Err" # Let the user know something is wrong.
+ else:
+ # If the lookup fails, this means that the "include_score"
+ # option isn't activated. We have the choice here to either
+ # calculate it now, which is pretty inefficient, since we have
+ # already done so, or to admit that we don't know what it is.
+ # We'll go with the latter.
+ score = "?"
+ try:
+ part = typed_subpart_iterator(message, 'text', 'plain').next()
+ text = part.get_payload()
+ except StopIteration:
+ try:
+ part = typed_subpart_iterator(message, 'text', 'html').next()
+ text = part.get_payload()
+ text, unused = tokenizer.crack_html_style(text)
+ text, unused = tokenizer.crack_html_comment(text)
+ text = tokenizer.html_re.sub(' ', text)
+ text = _('(this message only has an HTML body)\n') + text
+ except StopIteration:
+ text = _('(this message has no text body)')
+ if type(text) == type([]): # gotta be a 'right' way to do this
+ text = _("(this message is a digest of %s messages)") % (len(text))
+ elif text is None:
+ text = _("(this message has no body)")
+ else:
+ text = text.replace(' ', ' ') # Else they'll be quoted
+ text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
+ text = text.strip()
+
+ class _MessageInfo:
+ pass
+ messageInfo = _MessageInfo()
+ for headerName, headerValue in headers.items():
+ headerValue = self._trimHeader(headerValue, 45, True)
+ setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
+ messageInfo.score = score
+ messageInfo.bodySummary = self._trimHeader(text, 200)
+ return messageInfo
From montanaro at users.sourceforge.net Sun Jun 10 17:22:44 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:22:44 -0700
Subject: [Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.10,
1.1.2.11
Message-ID: <20070610152250.54D661E4009@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv5944/spambayes
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
typo in comment
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.10
retrieving revision 1.1.2.11
diff -C2 -d -r1.1.2.10 -r1.1.2.11
*** XMLRPCPlugin.py 7 Jun 2007 02:50:13 -0000 1.1.2.10
--- XMLRPCPlugin.py 10 Jun 2007 15:22:41 -0000 1.1.2.11
***************
*** 32,36 ****
xmlrpc_host - host to listen to (default: localhost)
! xmlrpc_port - port to listen to (default: 5001)
xmlrpc_path - path to support (default: /sbrpc)
--- 32,36 ----
xmlrpc_host - host to listen to (default: localhost)
! xmlrpc_port - port to listen to (default: 8001)
xmlrpc_path - path to support (default: /sbrpc)
From montanaro at users.sourceforge.net Sun Jun 10 17:27:40 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:27:40 -0700
Subject: [Spambayes-checkins] website faq.txt,1.93,1.94
Message-ID: <20070610152742.50D981E4008@bag.python.org>
Update of /cvsroot/spambayes/website
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/website
Modified Files:
faq.txt
Log Message:
Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py
and attendant bits, such as an XML-RPC plugin. The web interface is
straight from the POP3 proxy server.
Index: faq.txt
===================================================================
RCS file: /cvsroot/spambayes/website/faq.txt,v
retrieving revision 1.93
retrieving revision 1.94
diff -C2 -d -r1.93 -r1.94
*** faq.txt 13 May 2007 13:44:51 -0000 1.93
--- faq.txt 10 Jun 2007 15:27:37 -0000 1.94
***************
*** 294,297 ****
--- 294,303 ----
http://mail.python.org/pipermail/spambayes-bugs/2007-January/004119.html
+ Note that you will probably have to execute the installer with elevated
+ privileges. Right-clicking on the EXE and selecting "Run as Administrator"
+ should work (and will be necessary even if you are logged in as an admin
+ user).
+
+
Does SpamBayes work with Outlook Express?
-----------------------------------------
From montanaro at users.sourceforge.net Sun Jun 10 17:27:38 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:27:38 -0700
Subject: [Spambayes-checkins] spambayes setup.py,1.32,1.33
Message-ID: <20070610152742.5E3A51E400D@bag.python.org>
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512
Modified Files:
setup.py
Log Message:
Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py
and attendant bits, such as an XML-RPC plugin. The web interface is
straight from the POP3 proxy server.
Index: setup.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/setup.py,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** setup.py 6 Dec 2004 03:04:17 -0000 1.32
--- setup.py 10 Jun 2007 15:27:36 -0000 1.33
***************
*** 100,103 ****
--- 100,104 ----
'scripts/sb_pop3dnd.py',
'scripts/sb_server.py',
+ 'scripts/core_server.py',
'scripts/sb_unheader.py',
'scripts/sb_upload.py',
***************
*** 128,131 ****
--- 129,133 ----
'spambayes',
'spambayes.resources',
+ 'spambayes.core_resources',
],
classifiers = [
From montanaro at users.sourceforge.net Sun Jun 10 17:27:39 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:27:39 -0700
Subject: [Spambayes-checkins] spambayes/scripts core_server.py,1.1,1.2
Message-ID: <20070610152742.7B4571E400E@bag.python.org>
Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/scripts
Added Files:
core_server.py
Log Message:
Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py
and attendant bits, such as an XML-RPC plugin. The web interface is
straight from the POP3 proxy server.
From montanaro at users.sourceforge.net Sun Jun 10 17:27:39 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:27:39 -0700
Subject: [Spambayes-checkins] spambayes/spambayes CorePlugin.py, 1.1,
1.2 CoreUI.py, 1.1, 1.2 XMLRPCPlugin.py, 1.1, 1.2 Options.py,
1.141, 1.142 ProxyUI.py, 1.64, 1.65 UserInterface.py, 1.61,
1.62 dnscache.py, 1.3, 1.4 storage.py, 1.63, 1.64
Message-ID: <20070610152744.B48811E400B@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/spambayes
Modified Files:
Options.py ProxyUI.py UserInterface.py dnscache.py storage.py
Added Files:
CorePlugin.py CoreUI.py XMLRPCPlugin.py
Log Message:
Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py
and attendant bits, such as an XML-RPC plugin. The web interface is
straight from the POP3 proxy server.
Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v
retrieving revision 1.141
retrieving revision 1.142
diff -C2 -d -r1.141 -r1.142
*** Options.py 26 Mar 2007 07:57:13 -0000 1.141
--- Options.py 10 Jun 2007 15:27:36 -0000 1.142
***************
*** 653,656 ****
--- 653,671 ----
PATH, DO_NOT_RESTORE),
+ ("core_spam_cache", _("Spam cache directory"), "core-spam-cache",
+ _("""Directory that SpamBayes should cache spam in. If this does
+ not exist, it will be created."""),
+ PATH, DO_NOT_RESTORE),
+
+ ("core_ham_cache", _("Ham cache directory"), "core-ham-cache",
+ _("""Directory that SpamBayes should cache ham in. If this does
+ not exist, it will be created."""),
+ PATH, DO_NOT_RESTORE),
+
+ ("core_unknown_cache", _("Unknown cache directory"), "core-unknown-cache",
+ _("""Directory that SpamBayes should cache unclassified messages in.
+ If this does not exist, it will be created."""),
+ PATH, DO_NOT_RESTORE),
+
("cache_messages", _("Cache messages"), True,
_("""You can disable the pop3proxy caching of messages. This
***************
*** 1280,1283 ****
--- 1295,1309 ----
r"\w\w(?:_\w\w)?", RESTORE),
),
+ "Plugin": (
+ ("xmlrpc_path", _("XML-RPC path"), "/sbrpc",
+ _("""The path to respond to."""),
+ r"[\w]+", RESTORE),
+ ("xmlrpc_host", _("XML-RPC host"), "localhost",
+ _("""The host to listen on."""),
+ SERVER, RESTORE),
+ ("xmlrpc_port", _("XML-RPC port"), 8001,
+ _("""The port to listen on."""),
+ r"[\d]+", RESTORE),
+ ),
}
Index: ProxyUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v
retrieving revision 1.64
retrieving revision 1.65
diff -C2 -d -r1.64 -r1.65
*** ProxyUI.py 28 Nov 2005 10:54:18 -0000 1.64
--- ProxyUI.py 10 Jun 2007 15:27:36 -0000 1.65
***************
*** 54,58 ****
True, False = 1, 0
- import re
import cgi
import time
--- 54,57 ----
***************
*** 72,81 ****
from spambayes.compatsets import Set
- import tokenizer
import UserInterface
from spambayes.Options import options, _
- from email.Iterators import typed_subpart_iterator
! global state
# These are the options that will be offered on the configuration page.
--- 71,78 ----
from spambayes.compatsets import Set
import UserInterface
from spambayes.Options import options, _
! state = None
# These are the options that will be offered on the configuration page.
***************
*** 111,115 ****
)
! # Like the above, but hese are the options that will be offered on the
# advanced configuration page.
adv_map = (
--- 108,112 ----
)
! # Like the above, but these are the options that will be offered on the
# advanced configuration page.
adv_map = (
***************
*** 174,178 ****
self.state_recreator = state_recreator # ugly
self.app_for_version = "SpamBayes Proxy"
- self.previous_sort = None
if not proxy_state.can_stop:
self.html._readonly = False
--- 171,174 ----
***************
*** 221,241 ****
self.write(_("OK. Return Home.
"))
- def _keyToTimestamp(self, key):
- """Given a message key (as seen in a Corpus), returns the timestamp
- for that message. This is the time that the message was received,
- not the Date header."""
- return long(key[:10])
-
- def _getTimeRange(self, timestamp):
- """Given a unix timestamp, returns a 3-tuple: the start timestamp
- of the given day, the end timestamp of the given day, and the
- formatted date of the given day."""
- this = time.localtime(timestamp)
- start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8])
- end = time.localtime(time.mktime(start) + 36*60*60)
- end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8])
- date = time.strftime("%A, %B %d, %Y", start)
- return time.mktime(start), time.mktime(end), date
-
def _buildReviewKeys(self, timestamp):
"""Builds an ordered list of untrained message keys, ready for output
--- 217,220 ----
***************
*** 278,382 ****
return keys, date, prior, start, end
- def _sortMessages(self, messages, sort_order, reverse=False):
- """Sorts the message by the appropriate attribute. If this was the
- previous sort order, then reverse it."""
- if sort_order is None or sort_order == "received":
- # Default sorting, which is in reverse order of appearance.
- # This is complicated because the 'received' info is the key.
- messages.sort()
- if self.previous_sort == sort_order:
- messages.reverse()
- self.previous_sort = None
- else:
- self.previous_sort = 'received'
- return messages
- tmplist = [(getattr(x[1], sort_order), x) for x in messages]
- tmplist.sort()
- if reverse:
- tmplist.reverse()
- return [x for (key, x) in tmplist]
-
- def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
- reverse=False):
- """Appends the rows of a table of messages to 'table'."""
- stripe = 0
-
- keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
- reverse)
- nrows = options["html_ui", "rows_per_section"]
- for key, messageInfo in keyedMessageInfo[:nrows]:
- unused, unused, messageInfo.received = \
- self._getTimeRange(self._keyToTimestamp(key))
- row = self.html.reviewRow.clone()
- try:
- score = messageInfo.score
- except ValueError:
- score = None
- if label == _('Spam'):
- if score is not None \
- and score > options["html_ui", "spam_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_spam_action"])
- elif label == _('Ham'):
- if score is not None \
- and score < options["html_ui", "ham_discard_level"]:
- r_att = getattr(row, 'discard')
- else:
- r_att = getattr(row, options["html_ui",
- "default_ham_action"])
- else:
- r_att = getattr(row, options["html_ui",
- "default_unsure_action"])
- setattr(r_att, "checked", 1)
-
- row.optionalHeadersValues = '' # make way for real list
- for header in options["html_ui", "display_headers"]:
- header = header.lower()
- text = getattr(messageInfo, "%sHeader" % (header,))
- if header == "subject":
- # Subject is special, because it links to the body.
- # If the user doesn't display the subject, then there
- # is no link to the body.
- h = self.html.reviewRow.linkedHeaderValue.clone()
- h.text.title = messageInfo.bodySummary
- h.text.href = "view?key=%s&corpus=%s" % (key, label)
- else:
- h = self.html.reviewRow.headerValue.clone()
- h.text = text
- row.optionalHeadersValues += h
-
- # Apart from any message headers, we may also wish to display
- # the message score, and the time the message was received.
- if options["html_ui", "display_score"]:
- if isinstance(messageInfo.score, types.StringTypes):
- # Presumably either "?" or "Err".
- row.score_ = messageInfo.score
- else:
- row.score_ = "%.2f%%" % (messageInfo.score,)
- else:
- del row.score_
- if options["html_ui", "display_received_time"]:
- row.received_ = messageInfo.received
- else:
- del row.received_
-
- # Many characters can't go in the URL or they cause problems
- # (&, ;, ?, etc). So we use the hex values for them all.
- subj_list = []
- for c in messageInfo.subjectHeader:
- subj_list.append("%%%s" % (hex(ord(c))[2:],))
- subj = "".join(subj_list)
- row.classify.href="showclues?key=%s&subject=%s" % (key, subj)
- row.tokens.href="showclues?key=%s&subject=%s&tokens=1" % (key, subj)
- setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
- setattr(row, 'onMouseOut',
- ["this.className='stripe_on';",
- "this.className='stripe_off';"][stripe])
- row = str(row).replace('TYPE', label).replace('KEY', key)
- table += row
- stripe = stripe ^ 1
-
def onReview(self, **params):
"""Present a list of message for (re)training."""
--- 257,260 ----
***************
*** 619,626 ****
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (header.lower(),)
! h.headerName = header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
--- 497,504 ----
sh.optionalHeaders = ''
h = self.html.headerHeader.clone()
! for disp_header in options["html_ui", "display_headers"]:
h.headerLink.href = 'review?sort=%sHeader' % \
! (disp_header.lower(),)
! h.headerName = disp_header.title()
sh.optionalHeaders += h
if not options["html_ui", "display_score"]:
***************
*** 649,661 ****
self._writePostamble(help_topic="review")
- def _contains(self, a, b, ignore_case=False):
- """Return true if substring b is part of string a."""
- assert isinstance(a, types.StringTypes)
- assert isinstance(b, types.StringTypes)
- if ignore_case:
- a = a.lower()
- b = b.lower()
- return a.find(b) >= 0
-
def onView(self, key, corpus):
"""View a message - linked from the Review page."""
--- 527,530 ----
***************
*** 702,769 ****
self._writePostamble()
- def _makeMessageInfo(self, message):
- """Given an email.Message, return an object with subjectHeader,
- bodySummary and other header (as needed) attributes. These objects
- are passed into appendMessages by onReview - passing email.Message
- objects directly uses too much memory.
- """
- # Remove notations before displaying - see:
- # [ 848365 ] Remove subject annotations from message review page
- message.delNotations()
- subjectHeader = message["Subject"] or "(none)"
- headers = {"subject" : subjectHeader}
- for header in options["html_ui", "display_headers"]:
- headers[header.lower()] = (message[header] or "(none)")
- score = message[options["Headers", "score_header_name"]]
- if score:
- # the score might have the log info at the end
- op = score.find('(')
- if op >= 0:
- score = score[:op]
- try:
- score = float(score) * 100
- except ValueError:
- # Hmm. The score header should only contain a floating
- # point number. What's going on here, then?
- score = "Err" # Let the user know something is wrong.
- else:
- # If the lookup fails, this means that the "include_score"
- # option isn't activated. We have the choice here to either
- # calculate it now, which is pretty inefficient, since we have
- # already done so, or to admit that we don't know what it is.
- # We'll go with the latter.
- score = "?"
- try:
- part = typed_subpart_iterator(message, 'text', 'plain').next()
- text = part.get_payload()
- except StopIteration:
- try:
- part = typed_subpart_iterator(message, 'text', 'html').next()
- text = part.get_payload()
- text, unused = tokenizer.crack_html_style(text)
- text, unused = tokenizer.crack_html_comment(text)
- text = tokenizer.html_re.sub(' ', text)
- text = _('(this message only has an HTML body)\n') + text
- except StopIteration:
- text = _('(this message has no text body)')
- if type(text) == type([]): # gotta be a 'right' way to do this
- text = _("(this message is a digest of %s messages)") % (len(text))
- elif text is None:
- text = _("(this message has no body)")
- else:
- text = text.replace(' ', ' ') # Else they'll be quoted
- text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
- text = text.strip()
-
- class _MessageInfo:
- pass
- messageInfo = _MessageInfo()
- for headerName, headerValue in headers.items():
- headerValue = self._trimHeader(headerValue, 45, True)
- setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
- messageInfo.score = score
- messageInfo.bodySummary = self._trimHeader(text, 200)
- return messageInfo
-
def close_database(self):
state.close()
--- 571,574 ----
Index: UserInterface.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v
retrieving revision 1.61
retrieving revision 1.62
diff -C2 -d -r1.61 -r1.62
*** UserInterface.py 28 Nov 2005 10:53:12 -0000 1.61
--- UserInterface.py 10 Jun 2007 15:27:36 -0000 1.62
***************
*** 80,83 ****
--- 80,84 ----
import types
import StringIO
+ from email.Iterators import typed_subpart_iterator
import oe_mailbox
***************
*** 277,280 ****
--- 278,282 ----
self.stats = stats
self.app_for_version = None # subclasses must fill this in
+ self.previous_sort = None
def onClassify(self, file, text, which):
***************
*** 820,823 ****
--- 822,827 ----
elif parms["how"] == _("Save experimental options"):
pmap = experimental_ini_map
+ elif parms["how"] == _("Save plugin options"):
+ pmap = self.plugin_ini_map
del parms["how"]
html = self._getHTMLClone()
***************
*** 1251,1252 ****
--- 1255,1448 ----
lines.append(''.join(cur_line))
return lines
+
+ def _keyToTimestamp(self, key):
+ """Given a message key (as seen in a Corpus), returns the timestamp
+ for that message. This is the time that the message was received,
+ not the Date header."""
+ return long(key[:10])
+
+ def _getTimeRange(self, timestamp):
+ """Given a unix timestamp, returns a 3-tuple: the start timestamp
+ of the given day, the end timestamp of the given day, and the
+ formatted date of the given day."""
+ this = time.localtime(timestamp)
+ start = (this[0], this[1], this[2], 0, 0, 0, this[6], this[7], this[8])
+ end = time.localtime(time.mktime(start) + 36*60*60)
+ end = (end[0], end[1], end[2], 0, 0, 0, end[6], end[7], end[8])
+ date = time.strftime("%A, %B %d, %Y", start)
+ return time.mktime(start), time.mktime(end), date
+
+ def _sortMessages(self, messages, sort_order, reverse=False):
+ """Sorts the message by the appropriate attribute. If this was the
+ previous sort order, then reverse it."""
+ if sort_order is None or sort_order == "received":
+ # Default sorting, which is in reverse order of appearance.
+ # This is complicated because the 'received' info is the key.
+ messages.sort()
+ if self.previous_sort == sort_order:
+ messages.reverse()
+ self.previous_sort = None
+ else:
+ self.previous_sort = 'received'
+ return messages
+ tmplist = [(getattr(x[1], sort_order), x) for x in messages]
+ tmplist.sort()
+ if reverse:
+ tmplist.reverse()
+ return [x for (key, x) in tmplist]
+
+ def _appendMessages(self, table, keyedMessageInfo, label, sort_order,
+ reverse=False):
+ """Appends the rows of a table of messages to 'table'."""
+ stripe = 0
+
+ keyedMessageInfo = self._sortMessages(keyedMessageInfo, sort_order,
+ reverse)
+ nrows = options["html_ui", "rows_per_section"]
+ for key, messageInfo in keyedMessageInfo[:nrows]:
+ unused, unused, messageInfo.received = \
+ self._getTimeRange(self._keyToTimestamp(key))
+ row = self.html.reviewRow.clone()
+ try:
+ score = messageInfo.score
+ except ValueError:
+ score = None
+ if label == _('Spam'):
+ if score is not None \
+ and score > options["html_ui", "spam_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_spam_action"])
+ elif label == _('Ham'):
+ if score is not None \
+ and score < options["html_ui", "ham_discard_level"]:
+ r_att = getattr(row, 'discard')
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_ham_action"])
+ else:
+ r_att = getattr(row, options["html_ui",
+ "default_unsure_action"])
+ setattr(r_att, "checked", 1)
+
+ row.optionalHeadersValues = '' # make way for real list
+ for header in options["html_ui", "display_headers"]:
+ header = header.lower()
+ text = getattr(messageInfo, "%sHeader" % (header,))
+ if header == "subject":
+ # Subject is special, because it links to the body.
+ # If the user doesn't display the subject, then there
+ # is no link to the body.
+ h = self.html.reviewRow.linkedHeaderValue.clone()
+ h.text.title = messageInfo.bodySummary
+ h.text.href = "view?key=%s&corpus=%s" % (key, label)
+ else:
+ h = self.html.reviewRow.headerValue.clone()
+ h.text = text
+ row.optionalHeadersValues += h
+
+ # Apart from any message headers, we may also wish to display
+ # the message score, and the time the message was received.
+ if options["html_ui", "display_score"]:
+ if isinstance(messageInfo.score, types.StringTypes):
+ # Presumably either "?" or "Err".
+ row.score_ = messageInfo.score
+ else:
+ row.score_ = "%.2f%%" % (messageInfo.score,)
+ else:
+ del row.score_
+ if options["html_ui", "display_received_time"]:
+ row.received_ = messageInfo.received
+ else:
+ del row.received_
+
+ # Many characters can't go in the URL or they cause problems
+ # (&, ;, ?, etc). So we use the hex values for them all.
+ subj_list = []
+ for c in messageInfo.subjectHeader:
+ subj_list.append("%%%s" % (hex(ord(c))[2:],))
+ subj = "".join(subj_list)
+ row.classify.href = "showclues?key=%s&subject=%s" % (key, subj)
+ row.tokens.href = ("showclues?key=%s&subject=%s&tokens=1" %
+ (key, subj))
+ setattr(row, 'class', ['stripe_on', 'stripe_off'][stripe]) # Grr!
+ setattr(row, 'onMouseOut',
+ ["this.className='stripe_on';",
+ "this.className='stripe_off';"][stripe])
+ row = str(row).replace('TYPE', label).replace('KEY', key)
+ table += row
+ stripe = stripe ^ 1
+
+ def _contains(self, a, b, ignore_case=False):
+ """Return true if substring b is part of string a."""
+ assert isinstance(a, types.StringTypes)
+ assert isinstance(b, types.StringTypes)
+ if ignore_case:
+ a = a.lower()
+ b = b.lower()
+ return a.find(b) >= 0
+
+ def _makeMessageInfo(self, message):
+ """Given an email.Message, return an object with subjectHeader,
+ bodySummary and other header (as needed) attributes. These objects
+ are passed into appendMessages by onReview - passing email.Message
+ objects directly uses too much memory.
+ """
+ # Remove notations before displaying - see:
+ # [ 848365 ] Remove subject annotations from message review page
+ message.delNotations()
+ subjectHeader = message["Subject"] or "(none)"
+ headers = {"subject" : subjectHeader}
+ for header in options["html_ui", "display_headers"]:
+ headers[header.lower()] = (message[header] or "(none)")
+ score = message[options["Headers", "score_header_name"]]
+ if score:
+ # the score might have the log info at the end
+ op = score.find('(')
+ if op >= 0:
+ score = score[:op]
+ try:
+ score = float(score) * 100
+ except ValueError:
+ # Hmm. The score header should only contain a floating
+ # point number. What's going on here, then?
+ score = "Err" # Let the user know something is wrong.
+ else:
+ # If the lookup fails, this means that the "include_score"
+ # option isn't activated. We have the choice here to either
+ # calculate it now, which is pretty inefficient, since we have
+ # already done so, or to admit that we don't know what it is.
+ # We'll go with the latter.
+ score = "?"
+ try:
+ part = typed_subpart_iterator(message, 'text', 'plain').next()
+ text = part.get_payload()
+ except StopIteration:
+ try:
+ part = typed_subpart_iterator(message, 'text', 'html').next()
+ text = part.get_payload()
+ text, unused = tokenizer.crack_html_style(text)
+ text, unused = tokenizer.crack_html_comment(text)
+ text = tokenizer.html_re.sub(' ', text)
+ text = _('(this message only has an HTML body)\n') + text
+ except StopIteration:
+ text = _('(this message has no text body)')
+ if type(text) == type([]): # gotta be a 'right' way to do this
+ text = _("(this message is a digest of %s messages)") % (len(text))
+ elif text is None:
+ text = _("(this message has no body)")
+ else:
+ text = text.replace(' ', ' ') # Else they'll be quoted
+ text = re.sub(r'(\s)\s+', r'\1', text) # Eg. multiple blank lines
+ text = text.strip()
+
+ class _MessageInfo:
+ pass
+ messageInfo = _MessageInfo()
+ for headerName, headerValue in headers.items():
+ headerValue = self._trimHeader(headerValue, 45, True)
+ setattr(messageInfo, "%sHeader" % (headerName,), headerValue)
+ messageInfo.score = score
+ messageInfo.bodySummary = self._trimHeader(text, 200)
+ return messageInfo
Index: dnscache.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/dnscache.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** dnscache.py 13 Aug 2006 02:05:43 -0000 1.3
--- dnscache.py 10 Jun 2007 15:27:36 -0000 1.4
***************
*** 23,29 ****
kCheckForPruneEvery=20
! kMaxTTL=60 * 60 * 24 * 7 # One week
! kPruneThreshold=1500 # May go over slightly; numbers chosen at random
! kPruneDownTo=1000
--- 23,32 ----
kCheckForPruneEvery=20
! kMaxTTL=60 * 60 * 24 * 7 # One week
! # Some servers always return a TTL of zero. We'll hold onto data a bit
! # longer.
! kMinTTL=24 * 60 * 60 * 1 # one day
! kPruneThreshold=5000 # May go over slightly; numbers chosen at random
! kPruneDownTo=2500
***************
*** 89,97 ****
self.dnsTimeout=10
- # Some servers always return a TTL of zero.
- # In those cases, turning this up a bit is
- # probably reasonable.
- self.minTTL=0
-
# end of user-settable attributes
--- 92,95 ----
***************
*** 160,164 ****
c=self.caches[answer.qType]
c[answer.question].remove(answer)
! if len(c[answer.question])==0:
del c[answer.question]
--- 158,162 ----
c=self.caches[answer.qType]
c[answer.question].remove(answer)
! if not c[answer.question]:
del c[answer.question]
***************
*** 180,184 ****
c=self.caches[answer.qType]
c[answer.question].remove(answer)
! if len(c[answer.question])==0:
del c[answer.question]
--- 178,182 ----
c=self.caches[answer.qType]
c[answer.question].remove(answer)
! if not c[answer.question]:
del c[answer.question]
***************
*** 218,233 ****
pass
else:
! assert len(answers)>0
! ind=0
! # No guarantee that expire has already been done
! while ind> sys.stderr, "lookup failure:", question
! if not answers:
del cacheToLookIn[question]
else:
***************
*** 250,275 ****
except DNS.Base.DNSError,detail:
if detail.args[0]<>"Timeout":
! print "Error, fixme",detail
! print "Question was",queryQuestion
! print "Origianal question was",question
! print "Type was",qType
objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
cacheToLookIn[question]=objs # Add to format for return?
return self.formatForReturn(objs)
except socket.gaierror,detail:
! print "DNS connection failure:", self.queryObj.ns, detail
! print "Defaults:", DNS.defaults
objs=[]
for answer in reply.answers:
if answer["typename"]==qType:
! # PyDNS returns TTLs as longs but RFC 1035 says that the
! # TTL value is a signed 32-bit value and must be positive,
! # so it should be safe to coerce it to a Python integer.
! # And anyone who sets a time to live of more than 2^31-1
! # seconds (68 years and change) is drunk.
! # Arguably, I ought to impose a maximum rather than continuing
! # with longs (int(long) returns long in recent versions of Python).
! ttl=max(min(int(answer["ttl"]),kMaxTTL),self.minTTL)
# RFC 2308 says that you should cache an NXDOMAIN for the
# minimum of the minimum field of the SOA record and the TTL
--- 250,275 ----
except DNS.Base.DNSError,detail:
if detail.args[0]<>"Timeout":
! print >> sys.stderr, "Error, fixme", detail
! print >> sys.stderr, "Question was", queryQuestion
! print >> sys.stderr, "Original question was", question
! print >> sys.stderr, "Type was", qType
objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
cacheToLookIn[question]=objs # Add to format for return?
return self.formatForReturn(objs)
except socket.gaierror,detail:
! print >> sys.stderr, "DNS connection failure:", self.queryObj.ns, detail
! print >> sys.stderr, "Defaults:", DNS.defaults
objs=[]
for answer in reply.answers:
if answer["typename"]==qType:
! # PyDNS returns TTLs as longs but RFC 1035 says that the TTL
! # value is a signed 32-bit value and must be positive, so it
! # should be safe to coerce it to a Python integer. And
! # anyone who sets a time to live of more than 2^31-1 seconds
! # (68 years and change) is drunk. Arguably, I ought to
! # impose a maximum rather than continuing with longs
! # (int(long) returns long in recent versions of Python).
! ttl=max(min(int(answer["ttl"]),kMaxTTL),kMinTTL)
# RFC 2308 says that you should cache an NXDOMAIN for the
# minimum of the minimum field of the SOA record and the TTL
***************
*** 279,288 ****
objs.append(item)
! if len(objs)>0:
cacheToLookIn[question]=objs
return self.formatForReturn(objs)
# Probably SERVFAIL or the like
! if len(reply.authority)==0:
objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
cacheToLookIn[question]=objs
--- 279,288 ----
objs.append(item)
! if objs:
cacheToLookIn[question]=objs
return self.formatForReturn(objs)
# Probably SERVFAIL or the like
! if not reply.authority:
objs=[ lookupResult(qType,None,question,self.cacheErrorSecs+now,now) ]
cacheToLookIn[question]=objs
***************
*** 319,329 ****
"www.seeputofor.com", "www.completegarbage.tv",
"www.tradelinkllc.com"]:
! print "checking", host
now=time.time()
ips=c.lookup(host)
! print ips,time.time()-now
now=time.time()
ips=c.lookup(host)
! print ips,time.time()-now
if ips:
--- 319,329 ----
"www.seeputofor.com", "www.completegarbage.tv",
"www.tradelinkllc.com"]:
! print >> sys.stderr, "checking", host
now=time.time()
ips=c.lookup(host)
! print >> sys.stderr, ips,time.time()-now
now=time.time()
ips=c.lookup(host)
! print >> sys.stderr, ips,time.time()-now
if ips:
***************
*** 331,340 ****
now=time.time()
name=c.lookup(ip,qType="PTR")
! print name,time.time()-now
now=time.time()
name=c.lookup(ip,qType="PTR")
! print name,time.time()-now
else:
! print "unknown"
c.close()
--- 331,340 ----
now=time.time()
name=c.lookup(ip,qType="PTR")
! print >> sys.stderr, name,time.time()-now
now=time.time()
name=c.lookup(ip,qType="PTR")
! print >> sys.stderr, name,time.time()-now
else:
! print >> sys.stderr, "unknown"
c.close()
Index: storage.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v
retrieving revision 1.63
retrieving revision 1.64
diff -C2 -d -r1.63 -r1.64
*** storage.py 11 May 2007 00:23:08 -0000 1.63
--- storage.py 10 Jun 2007 15:27:36 -0000 1.64
***************
*** 721,726 ****
import ZODB
from ZODB.FileStorage import FileStorage
! self.storage = FileStorage(self.db_filename,
! read_only=self.mode=='r')
def load(self):
--- 721,731 ----
import ZODB
from ZODB.FileStorage import FileStorage
! try:
! self.storage = FileStorage(self.db_filename,
! read_only=self.mode=='r')
! except IOError, msg:
! print >> sys.stderr, ("Could not create FileStorage from",
! self.db_filename)
! raise
def load(self):
***************
*** 774,778 ****
from ZODB.POSException import ReadOnlyError
! assert self.closed == False, "Can't store a closed database"
if options["globals", "verbose"]:
--- 779,783 ----
from ZODB.POSException import ReadOnlyError
! assert not self.closed, "Can't store a closed database"
if options["globals", "verbose"]:
From montanaro at users.sourceforge.net Sun Jun 10 17:27:40 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Sun, 10 Jun 2007 08:27:40 -0700
Subject: [Spambayes-checkins] spambayes/spambayes/core_resources README.txt,
1.1, 1.2 README_txt.py, 1.1, 1.2 __init__.py, 1.1,
1.2 classify.gif, 1.1, 1.2 classify_gif.py, 1.1,
1.2 config.gif, 1.1, 1.2 config_gif.py, 1.1, 1.2 helmet.gif,
1.1, 1.2 helmet_gif.py, 1.1, 1.2 help.gif, 1.1,
1.2 help_gif.py, 1.1, 1.2 message.gif, 1.1, 1.2 message_gif.py,
1.1, 1.2 query.gif, 1.1, 1.2 query_gif.py, 1.1,
1.2 scanning__init__.py, 1.1, 1.2 status.gif, 1.1,
1.2 status_gif.py, 1.1, 1.2 train.gif, 1.1, 1.2 train_gif.py,
1.1, 1.2 ui.html, 1.1, 1.2 ui.psp, 1.1, 1.2 ui_html.py, 1.1,
1.2 ui_psp.py, 1.1, 1.2
Message-ID: <20070610152745.2DBF71E4008@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes/core_resources
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7512/spambayes/core_resources
Added Files:
README.txt README_txt.py __init__.py classify.gif
classify_gif.py config.gif config_gif.py helmet.gif
helmet_gif.py help.gif help_gif.py message.gif message_gif.py
query.gif query_gif.py scanning__init__.py status.gif
status_gif.py train.gif train_gif.py ui.html ui.psp ui_html.py
ui_psp.py
Log Message:
Merge CORESVR branch to HEAD. This adds a new app, scripts/core_server.py
and attendant bits, such as an XML-RPC plugin. The web interface is
straight from the POP3 proxy server.
From montanaro at users.sourceforge.net Mon Jun 25 14:10:16 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 25 Jun 2007 05:10:16 -0700
Subject: [Spambayes-checkins] spambayes CHANGELOG.txt,1.58,1.59
Message-ID: <20070625121021.45FBD1E4012@bag.python.org>
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv19255
Modified Files:
CHANGELOG.txt
Log Message:
.
Index: CHANGELOG.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v
retrieving revision 1.58
retrieving revision 1.59
diff -C2 -d -r1.58 -r1.59
*** CHANGELOG.txt 9 Sep 2006 23:02:06 -0000 1.58
--- CHANGELOG.txt 25 Jun 2007 12:10:10 -0000 1.59
***************
*** 3,8 ****
Release 1.1a4
- Skip Montanaro 2006-09-09 First crack at handling image sequences
Skip Montanaro 2006-09-09 Dump NetPBM decode support in favor of PIL
Release 1.1a3
--- 3,14 ----
Release 1.1a4
Skip Montanaro 2006-09-09 Dump NetPBM decode support in favor of PIL
+ Skip Montanaro 2006-09-09 First crack at handling image sequences
+ Skip Montanaro 2007-05-11 IMAP patch for contrib/tte.py (Dave Abrahams)
+ Skip Montanaro 2007-05-11 Remove duplicate use of --cullext flag to contrib/tte.py
+ Skip Montanaro 2007-05-22 Note missing file name in error message - FileStorage.py
+ Skip Montanaro 2007-05-24 Set MinTTL to one day in dnscache.py
+ Skip Montanaro 2007-05-25 Catch broader exception in ImageStripper.py when image load fails (Sjoerd Mullender)
+ Skip Montanaro 2007-06-10 Add core_server.py & friends - plugin-based server
Release 1.1a3
From montanaro at users.sourceforge.net Tue Jun 26 00:51:11 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 25 Jun 2007 15:51:11 -0700
Subject: [Spambayes-checkins] website developer.ht, 1.13, 1.14 download.ht,
1.37, 1.38 index.ht, 1.41, 1.42
Message-ID: <20070625225117.DDC181E4018@bag.python.org>
Update of /cvsroot/spambayes/website
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv26214
Modified Files:
developer.ht download.ht index.ht
Log Message:
1.1a4 bits
Index: developer.ht
===================================================================
RCS file: /cvsroot/spambayes/website/developer.ht,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** developer.ht 16 Jan 2005 22:23:31 -0000 1.13
--- developer.ht 25 Jun 2007 22:51:01 -0000 1.14
***************
*** 35,56 ****
So what needs to be done
! 1.0 was released in July 2004, and was followed up by a bugfix 1.0.1
! release in November 2004. We intend to fix as many remaining bugs with the
! 1.0.x branch as is practical and hope to release 1.0.2 towards the end of
! January 2005. This is likely to be the final release in the 1.0.x line,
! unless there are unforeseen problems with the 1.0.2 or 1.1 releases.
Since May 2004, work has been carried out on a 1.1 release, which
includes many improvements, as well as bug fixes, compared to the 1.0.x
! branch. We hope to release 1.1a1 for public testing at the end of January
! 2005, to be followed by at least one more alpha, at least one beta, and
! at least one release candidate. We hope that a stable 1.1 release will
! be made in April 2005, although this date is certainly not fixed.
! The 1.1 line will be frozen for non-bugfix changes from the first
! beta release (probably early March 2005). Many of the changes desired
! by the developers have been implemented, or partly so, but there is
! still time for further improvement. There is no time limit on
! implementing bug fixes.
Some key work that is in progress for 1.1, which you could assist
--- 35,54 ----
So what needs to be done
!
! 1.0 was released in July 2004, and was followed up by three bugfix
! releases starting in November 2004. The current stable release is 1.0.4.
! This is likely to be the final release in the 1.0.x line.
Since May 2004, work has been carried out on a 1.1 release, which
includes many improvements, as well as bug fixes, compared to the 1.0.x
! branch. The latest alpha release is 1.1a4 (June 2007). If we could find
! more time or more help we could get to beta, release candidate and final
! releases of 1.1. We hope that a stable 1.1 release will be made during
! 2007, although this date is certainly not fixed.
! The 1.1 line will be frozen for non-bugfix changes from the first beta
! release. Many of the changes desired by the developers have been
! implemented, or partly so, but there is still time for further improvement.
! There is no time limit on implementing bug fixes.
Some key work that is in progress for 1.1, which you could assist
***************
*** 67,70 ****
--- 65,78 ----
ZODB/ZEO, have been added, and the SQL backends improved.
Improvement in the unit testing suite.
+ Testing and/or improving the image handling capabilities. 1.1a3
+ introduced OCR capability using the open source gocr program and
+ PIL.
+ Testing the new core_server.py application which implements a
+ plugin architecture for external protocol adapters. The first adapter
+ provides an XML-RPC interface, making it possible to extend SpamBayes to
+ websites and other non-mail applications. You could interface this server
+ to web applications such as Trac, MoinMoin or your favorite blog software.
+ You could also implement a POP3 protocol adapter so we can merge
+ core_server.py and sb-server.py.
Index: download.ht
===================================================================
RCS file: /cvsroot/spambayes/website/download.ht,v
retrieving revision 1.37
retrieving revision 1.38
diff -C2 -d -r1.37 -r1.38
*** download.ht 7 Aug 2006 22:23:26 -0000 1.37
--- download.ht 25 Jun 2007 22:51:01 -0000 1.38
***************
*** 11,16 ****
spambayes at python.org.
! The second alpha release of 1.1 is also now available. It is highly likely
! that there are new bugs in this release (especially with the IMAP filter),
but if you are willing and able to give it a spin for us, that would be
greatly appreciated. You might like to look at this
--- 11,17 ----
spambayes at python.org.
!
The latest alpha release of 1.1 is 1.1a4. At the moment it is only
! available as a source release. There is no Windows installer. It is highly likely
! that there are new bugs in this release
but if you are willing and able to give it a spin for us, that would be
greatly appreciated. You might like to look at this
***************
*** 72,88 ****
! spambayes-1.1a2.exe
! (3,025,816 bytes,
! sig)
!
! 6c94cb14008580c309dd176af73f2132
! spambayes-1.1a2.tar.gz
! (830,084 bytes,
! sig)
! spambayes-1.1a2.zip
! (971,031 bytes,
! sig)
--- 73,85 ----
! 35494ade1bf380651bcc3077bf108310
! spambayes-1.1a4.tar.gz
! (992,533 bytes,
! sig)
! sb-1.1a4.zip
! (1,128,015 bytes,
! sig)
Index: index.ht
===================================================================
RCS file: /cvsroot/spambayes/website/index.ht,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -d -r1.41 -r1.42
*** index.ht 7 Aug 2006 22:23:26 -0000 1.41
--- index.ht 25 Jun 2007 22:51:01 -0000 1.42
***************
*** 8,13 ****
archives and a Windows binary installer).
See the download page for more.
! SpamBayes 1.1a2 is also now available! (This includes both the source
! archives and a Windows binary installers). This is an alpha
release, so you should only try it if you are willing to try out
experimental releases - otherwise stick with 1.0.4.
--- 8,13 ----
archives and a Windows binary installer).
See the download page for more.
! SpamBayes 1.1a4 is also now available! (This currently includes only the
! source archives). This is an alpha
release, so you should only try it if you are willing to try out
experimental releases - otherwise stick with 1.0.4.
From montanaro at users.sourceforge.net Tue Jun 26 00:52:23 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Mon, 25 Jun 2007 15:52:23 -0700
Subject: [Spambayes-checkins] website/sigs sb11a4.zip.asc, NONE,
1.1 spambayes-1.1a4.tar.gz.asc, NONE, 1.1
Message-ID: <20070625225226.406371E4005@bag.python.org>
Update of /cvsroot/spambayes/website/sigs
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv27037
Added Files:
sb11a4.zip.asc spambayes-1.1a4.tar.gz.asc
Log Message:
1.1a4 sigs
--- NEW FILE: sb11a4.zip.asc ---
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (Darwin)
iD8DBQBGgCiM3WE5XNCqNv4RAlRfAJsFSLY/3Nra2346bSWD/G28CSY/CQCgmHOx
auQYWiLiTcuBSqdktMjq+sg=
=xbyw
-----END PGP SIGNATURE-----
--- NEW FILE: spambayes-1.1a4.tar.gz.asc ---
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (Darwin)
iD8DBQBGgCh+3WE5XNCqNv4RAmIsAJwKuMFBC4R4GGPu/H7sM4ziXNe2bQCdGQJf
TBpkq9CD66rFBdE7tDg8OmA=
=cYIy
-----END PGP SIGNATURE-----
From montanaro at users.sourceforge.net Wed Jun 27 03:59:52 2007
From: montanaro at users.sourceforge.net (Skip Montanaro)
Date: Tue, 26 Jun 2007 18:59:52 -0700
Subject: [Spambayes-checkins] spambayes README.txt,1.66,1.67
Message-ID: <20070627015956.904EE1E400A@bag.python.org>
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv7047
Modified Files:
README.txt
Log Message:
Add some directions about starting the XML-RPC (core) server.
Index: README.txt
===================================================================
RCS file: /cvsroot/spambayes/spambayes/README.txt,v
retrieving revision 1.66
retrieving revision 1.67
diff -C2 -d -r1.66 -r1.67
*** README.txt 12 Apr 2004 01:59:26 -0000 1.66
--- README.txt 27 Jun 2007 01:59:49 -0000 1.67
***************
*** 179,182 ****
--- 179,253 ----
+ XML-RPC Server
+ --------------
+
+ The XML-RPC server (new in 1.1a4) web interface is almost identical the the
+ POP3 proxy user interface. Instead of proxying POP3 communications though
+ it provides an XML-RPC server your (typically non-mail) applications can use
+ to score content submissions.
+
+ To install and configure it:
+
+ 1. Unpack and install the distribution:
+
+ tar xvfz spambayes-1.1a4.tar.gz
+ cd spambayes-1.1a4
+ python setup.py install
+
+ 2. Devote a runtime directory to it:
+
+ SBDIR=/usr/local/spambayes/core_server # or whatever...
+ mkdir -p $SBDIR
+
+ 3. Create an INI file:
+
+ cd $SBDIR
+ cat > bayescustomize.ini <
Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30139/scripts
Modified Files:
sb_server.py
Log Message:
Assign None to a Message instance's message_info_db
Index: sb_server.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/scripts/sb_server.py,v
retrieving revision 1.51
retrieving revision 1.52
diff -C2 -d -r1.51 -r1.52
*** sb_server.py 7 Apr 2006 02:35:34 -0000 1.51
--- sb_server.py 27 Jun 2007 10:33:23 -0000 1.52
***************
*** 799,803 ****
self.mdb.close()
self.mdb = None
! spambayes.message.Message.message_info_db = None
self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
--- 799,803 ----
self.mdb.close()
self.mdb = None
! spambayes.message.Message().message_info_db = None
self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
From mhammond at users.sourceforge.net Fri Jun 29 03:06:01 2007
From: mhammond at users.sourceforge.net (Mark Hammond)
Date: Thu, 28 Jun 2007 18:06:01 -0700
Subject: [Spambayes-checkins] spambayes/Outlook2000 config.py,1.38,1.39
Message-ID: <20070629010606.7506F1E400E@bag.python.org>
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv20708/Outlook2000
Modified Files:
config.py
Log Message:
Fix some string localization problems - strings such as "Untouched", "Moved"
and "Copied" are used internally as flags, not as literals displayed to the
user.
Index: config.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/config.py,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -d -r1.38 -r1.39
*** config.py 11 Feb 2005 21:05:49 -0000 1.38
--- config.py 29 Jun 2007 01:05:58 -0000 1.39
***************
*** 26,31 ****
FOLDER_ID = r"\(\'[a-fA-F0-9]+\', \'[a-fA-F0-9]+\'\)"
FIELD_NAME = r"[a-zA-Z0-9 ]+"
! FILTER_ACTION = _("Untouched"), _("Moved"), _("Copied")
! MSG_READ_STATE = _("None"), _("Read"), _("Unread")
from spambayes.OptionsClass import OptionsClass, Option
--- 26,34 ----
FOLDER_ID = r"\(\'[a-fA-F0-9]+\', \'[a-fA-F0-9]+\'\)"
FIELD_NAME = r"[a-zA-Z0-9 ]+"
! # These are stored in the INI file. They must not be localized - we can't
! # have all option settings being unrecognized just because a new localization
! # becomes available for users. The dialogs manage this.
! FILTER_ACTION = "Untouched", "Moved", "Copied"
! MSG_READ_STATE = "None", "Read", "Unread"
from spambayes.OptionsClass import OptionsClass, Option
From mhammond at users.sourceforge.net Fri Jun 29 03:06:55 2007
From: mhammond at users.sourceforge.net (Mark Hammond)
Date: Thu, 28 Jun 2007 18:06:55 -0700
Subject: [Spambayes-checkins] spambayes/Outlook2000/dialogs dialog_map.py,
1.50, 1.51
Message-ID: <20070629010657.89BF01E4009@bag.python.org>
Update of /cvsroot/spambayes/spambayes/Outlook2000/dialogs
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv21117/Outlook2000/dialogs
Modified Files:
dialog_map.py
Log Message:
Fix locaization of some strings in the dialogs.
Index: dialog_map.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/dialogs/dialog_map.py,v
retrieving revision 1.50
retrieving revision 1.51
diff -C2 -d -r1.50 -r1.51
*** dialog_map.py 11 Feb 2005 21:05:51 -0000 1.50
--- dialog_map.py 29 Jun 2007 01:06:53 -0000 1.51
***************
*** 477,481 ****
"Filter.watch_folder_ids",
"Filter.watch_include_sub"),
! (ComboProcessor, "IDC_ACTION_CERTAIN", "Filter.spam_action"),
(FolderIDProcessor, "IDC_FOLDER_CERTAIN IDC_BROWSE_CERTAIN",
"Filter.spam_folder_id"),
--- 477,482 ----
"Filter.watch_folder_ids",
"Filter.watch_include_sub"),
! (ComboProcessor, "IDC_ACTION_CERTAIN", "Filter.spam_action",
! _("Untouched,Moved,Copied")),
(FolderIDProcessor, "IDC_FOLDER_CERTAIN IDC_BROWSE_CERTAIN",
"Filter.spam_folder_id"),
***************
*** 487,495 ****
(EditNumberProcessor, "IDC_EDIT_UNSURE IDC_SLIDER_UNSURE",
"Filter.unsure_threshold"),
! (ComboProcessor, "IDC_ACTION_UNSURE", "Filter.unsure_action"),
(BoolButtonProcessor, "IDC_MARK_UNSURE_AS_READ", "Filter.unsure_mark_as_read"),
(FolderIDProcessor, "IDC_FOLDER_HAM IDC_BROWSE_HAM",
"Filter.ham_folder_id"),
! (ComboProcessor, "IDC_ACTION_HAM", "Filter.ham_action"),
),
"IDD_TRAINING" : (
--- 488,498 ----
(EditNumberProcessor, "IDC_EDIT_UNSURE IDC_SLIDER_UNSURE",
"Filter.unsure_threshold"),
! (ComboProcessor, "IDC_ACTION_UNSURE", "Filter.unsure_action",
! _("Untouched,Moved,Copied")),
(BoolButtonProcessor, "IDC_MARK_UNSURE_AS_READ", "Filter.unsure_mark_as_read"),
(FolderIDProcessor, "IDC_FOLDER_HAM IDC_BROWSE_HAM",
"Filter.ham_folder_id"),
! (ComboProcessor, "IDC_ACTION_HAM", "Filter.ham_action",
! _("Untouched,Moved,Copied")),
),
"IDD_TRAINING" : (
From mhammond at users.sourceforge.net Fri Jun 29 03:08:45 2007
From: mhammond at users.sourceforge.net (Mark Hammond)
Date: Thu, 28 Jun 2007 18:08:45 -0700
Subject: [Spambayes-checkins] spambayes/spambayes i18n.py,1.7,1.8
Message-ID: <20070629010849.692B21E4009@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv21706/spambayes
Modified Files:
i18n.py
Log Message:
To help in testing localizations, let SPAMBAYES_LANG in the environment
override getdefaultlocale()[0]
Index: i18n.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/i18n.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** i18n.py 8 Mar 2007 23:21:30 -0000 1.7
--- i18n.py 29 Jun 2007 01:08:43 -0000 1.8
***************
*** 115,120 ****
"""Get the default language for the locale."""
# Note that this may return None.
! return getdefaultlocale()[0]
!
def add_language(self, lang_code=None):
"""Add a language to the current languages list.
--- 115,123 ----
"""Get the default language for the locale."""
# Note that this may return None.
! try:
! return os.environ["SPAMBAYES_LANG"]
! except KeyError:
! return getdefaultlocale()[0]
!
def add_language(self, lang_code=None):
"""Add a language to the current languages list.
From mhammond at users.sourceforge.net Fri Jun 29 04:03:19 2007
From: mhammond at users.sourceforge.net (Mark Hammond)
Date: Thu, 28 Jun 2007 19:03:19 -0700
Subject: [Spambayes-checkins] spambayes/windows/py2exe setup_all.py, 1.28,
1.29
Message-ID: <20070629020323.401B61E4009@bag.python.org>
Update of /cvsroot/spambayes/spambayes/windows/py2exe
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv11161
Modified Files:
setup_all.py
Log Message:
include gocr.exe and gocr.txt in the binary distribution.
Index: setup_all.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/windows/py2exe/setup_all.py,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -d -r1.28 -r1.29
*** setup_all.py 26 Mar 2007 08:03:14 -0000 1.28
--- setup_all.py 29 Jun 2007 02:03:16 -0000 1.29
***************
*** 178,181 ****
--- 178,186 ----
["", [os.path.join(sb_top_dir, r"windows\resources\sbicon.ico")]],
["", [os.path.join(sb_top_dir, r"LICENSE.txt")]],
+ # We insist gocr.exe is in the 'spambayes' package dir (we can make
+ # this smarter as necessary)
+ ["bin", [os.path.join(sb_top_dir, "spambayes", "gocr.exe")]],
+ # Our .txt file with info on gocr itself.
+ ["bin", [os.path.join(sb_top_dir, "windows", "py2exe", "gocr.txt")]],
]