[Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.3, 1.1.2.4
Skip Montanaro
montanaro at users.sourceforge.net
Mon Jun 4 14:28:35 CEST 2007
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv23471/spambayes
Modified Files:
Tag: CORESVR
XMLRPCPlugin.py
Log Message:
+ docstring, refine API a bit
Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** XMLRPCPlugin.py 2 Jun 2007 21:42:08 -0000 1.1.2.3
--- XMLRPCPlugin.py 4 Jun 2007 12:28:33 -0000 1.1.2.4
***************
*** 1,6 ****
import threading
from email import Message, message_from_string
-
from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
--- 1,61 ----
+ """
+ XML-RPC plugin for SpamBayes core server.
+
+ This plugin opens an XML-RPC server in a separate thread listening to the
+ given host and port (default localhost:5001). In Python 2.5 and later it
+ also enforces a path (default /sbrpc).
+
+ SECURITY NOTE: The XML-RPC plugin provide *NO SECURITY*. It would be
+ unwise to listen to anything besides 'localhost'. Similarly, when
+ running the core_server configured with the XML-RPC plugin it's quite
+ likely that the main core_server interface will have to listen to
+ something other than localhost to allow administrators to administer
+ it remotely. Access to that URL should only be available to a set of
+ trusted administrators, probably by proxy through some other webserver
+ which provides the necessary authentication support.
+
+ The XML-RPC server exposes the following two methods:
+
+ score(form_dict, extra_tokens) -> (score, evidence)
+ Scores a dictionary representing the contents of a web
+ submission form and a list of any extra tokens provided
+ by the caller. The return value is a list containing
+ the spam probability of the input and a set of (token,
+ probability) pairs for the most significant tokens.
+
+ score_mime(msg, encoding) -> (score, evidence)
+ Scores a MIME message (a string encoded using encoding).
+ The return value is as for the score method.
+
+ train(form_dict, extra_tokens, is_spam) -> ''
+ Trains the given form and tokens as ham or spam.
+
+ train_mime(msg, encoding, is_spam) -> ''
+ Trains the given MIME message as ham or spam.
+
+ retrain() -> (nham, nspam)
+ Retrain from scratch on all saved MIME messages.
+
+ get_corpus(is_spam) -> string
+ Retrieve the current ham or spam corpus (in Unix mbox format).
+
+ set_corpus(string, is_spam) -> ''
+ Set the current ham or spam corpus (string in Unix mbox format).
+ Should normally be followed by a call to retrain().
+
+ The following options are available in the Plugin section of the options.
+
+ xmlrpc_host - host to listen to (default: localhost)
+ xmlrpc_port - port to listen to (default: 5001)
+ xmlrpc_path - path to support (default: /sbrpc)
+ hambox - path on server to ham corpus (default: TBD...)
+ spambox - path on server to spam corpus (default: TBD...)
+
+ """
+
import threading
+ import xmlrpclib
from email import Message, message_from_string
from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
***************
*** 16,19 ****
--- 71,76 ----
('Plugin', 'xmlrpc_host'),
('Plugin', 'xmlrpc_port'),
+ ('Plugin', 'hambox'),
+ ('Plugin', 'spambox'),
)
***************
*** 27,53 ****
# Path is only enforced in Python 2.5 and later but we set it anyway.
self.server.RequestHandlerClass.rpc_paths = (path,)
! self.server.register_function(self.score)
! self.server.register_function(self.score_mime)
self.thread = threading.Thread(target=self.server.serve_forever)
self.thread.start()
! # placeholders
! def score(self, form, attachments, extra_tokens):
! mime_message = form_to_mime(form, attachments, tokens)
return self.score_mime(mime_message)
! def score_mime(self, msg, mime_type):
if self.state.bayes is None:
self.state.create_workers()
! msg = unicode(msg, mime_type)
msg = message_from_string(msg)
! tokens = tokenize(msg)
! return self.state.bayes.spamprob(tokens, evidence=True)
! def form_to_mime(form, attachments, extra_tokens):
msg = Message.Message()
msg.set_type("multipart/digest")
main = Message.Message()
! main.set_payload(" ".join([str(v) for v in form.values()]))
msg.attach(main)
for msg_type, content in attachments:
--- 84,145 ----
# Path is only enforced in Python 2.5 and later but we set it anyway.
self.server.RequestHandlerClass.rpc_paths = (path,)
! self.server.register_instance(self)
self.thread = threading.Thread(target=self.server.serve_forever)
self.thread.start()
! def _dispatch(self, method, params):
! if method in ("score", "score_mime", "train", "train_mime"):
! return getattr(self, method)(*params)
! elif method in ("retrain", "get_corpus", "set_corpus"):
! return "%s not yet implemented" % method
! else:
! raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
!
! def score(self, form_dict, extra_tokens):
! """Score a dictionary + extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens)
return self.score_mime(mime_message)
! def score_mime(self, msg, encoding):
! """Score a message representing a MIME document.
!
! The msg argument will be a string in the given encoding.
! """
! tokens = self.tokenize(msg, encoding)
! return self.state.bayes.spamprob(tokens, evidence=True)
!
! def train(self, form_dict, extra_tokens, is_spam):
! """Train the form and extra tokens."""
! mime_message = form_to_mime(form_dict, extra_tokens)
! return self.train_mime(mime_message, is_spam)
!
! def train_mime(self, msg, is_spam):
! """Train the message."""
! tokens = self.tokenize(msg, encoding)
! return self.state.bayes.learn(tokens, is_spam)
!
! def tokenize(self, msg, encoding):
! """Tokenize the message. Make sure the bayes instance is available."""
if self.state.bayes is None:
self.state.create_workers()
! msg = unicode(msg, encoding)
msg = message_from_string(msg)
! return tokenize(msg)
! def form_to_mime(form, mime_type, extra_tokens):
! """Encode submission form bits as a MIME message.
!
! form - a dictionary of key/value pairs representing the form's contents
! extra_tokens - a sequence of synthetic tokens generated by the caller.
! For example, if you include a honeypot hidden field in your form, you
! might generate a synthetic token which tells if it was filled in or not.
! You might also generate tokens which indicate how long a submitting
! username has existed or how many successful posts that username has
! submitted.
! """
msg = Message.Message()
msg.set_type("multipart/digest")
main = Message.Message()
! main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
msg.attach(main)
for msg_type, content in attachments:
More information about the Spambayes-checkins
mailing list