[Spambayes-checkins] spambayes/spambayes XMLRPCPlugin.py, 1.1.2.3, 1.1.2.4

Mon Jun 4 14:28:35 CEST 2007

Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv23471/spambayes

Modified Files:
      Tag: CORESVR
	XMLRPCPlugin.py 
Log Message:
+ docstring, refine API a bit

Index: XMLRPCPlugin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Attic/XMLRPCPlugin.py,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** XMLRPCPlugin.py	2 Jun 2007 21:42:08 -0000	1.1.2.3
--- XMLRPCPlugin.py	4 Jun 2007 12:28:33 -0000	1.1.2.4
***************
*** 1,6 ****
  
  import threading
  from email import Message, message_from_string
- 
  from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
  
--- 1,61 ----
  
+ """
+ XML-RPC plugin for SpamBayes core server.
+ 
+ This plugin opens an XML-RPC server in a separate thread listening to the
+ given host and port (default localhost:5001).  In Python 2.5 and later it
+ also enforces a path (default /sbrpc).
+ 
+ SECURITY NOTE: The XML-RPC plugin provide *NO SECURITY*.  It would be
+ unwise to listen to anything besides 'localhost'.  Similarly, when
+ running the core_server configured with the XML-RPC plugin it's quite
+ likely that the main core_server interface will have to listen to
+ something other than localhost to allow administrators to administer
+ it remotely.  Access to that URL should only be available to a set of
+ trusted administrators, probably by proxy through some other webserver
+ which provides the necessary authentication support.
+ 
+ The XML-RPC server exposes the following two methods:
+ 
+     score(form_dict, extra_tokens) -> (score, evidence)
+         Scores a dictionary representing the contents of a web
+         submission form and a list of any extra tokens provided
+         by the caller.  The return value is a list containing
+         the spam probability of the input and a set of (token,
+         probability) pairs for the most significant tokens.
+ 
+     score_mime(msg, encoding) -> (score, evidence)
+         Scores a MIME message (a string encoded using encoding).
+         The return value is as for the score method.
+ 
+     train(form_dict, extra_tokens, is_spam) -> ''
+         Trains the given form and tokens as ham or spam.
+ 
+     train_mime(msg, encoding, is_spam) -> ''
+         Trains the given MIME message as ham or spam.
+ 
+     retrain() -> (nham, nspam)
+         Retrain from scratch on all saved MIME messages.
+ 
+     get_corpus(is_spam) -> string
+         Retrieve the current ham or spam corpus (in Unix mbox format).
+ 
+     set_corpus(string, is_spam) -> ''
+         Set the current ham or spam corpus (string in Unix mbox format).
+         Should normally be followed by a call to retrain().
+ 
+ The following options are available in the Plugin section of the options.
+ 
+     xmlrpc_host - host to listen to (default: localhost)
+     xmlrpc_port - port to listen to (default: 5001)
+     xmlrpc_path - path to support (default: /sbrpc)
+     hambox - path on server to ham corpus (default: TBD...)
+     spambox - path on server to spam corpus (default: TBD...)
+ 
+ """
+ 
  import threading
+ import xmlrpclib
  from email import Message, message_from_string
  from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler
  
***************
*** 16,19 ****
--- 71,76 ----
          ('Plugin',            'xmlrpc_host'),
          ('Plugin',            'xmlrpc_port'),
+         ('Plugin',            'hambox'),
+         ('Plugin',            'spambox'),
          )
  
***************
*** 27,53 ****
          # Path is only enforced in Python 2.5 and later but we set it anyway.
          self.server.RequestHandlerClass.rpc_paths = (path,)
!         self.server.register_function(self.score)
!         self.server.register_function(self.score_mime)
          self.thread = threading.Thread(target=self.server.serve_forever)
          self.thread.start()
  
!     # placeholders
!     def score(self, form, attachments, extra_tokens):
!         mime_message = form_to_mime(form, attachments, tokens)
          return self.score_mime(mime_message)
  
!     def score_mime(self, msg, mime_type):
          if self.state.bayes is None:
              self.state.create_workers()
!         msg = unicode(msg, mime_type)
          msg = message_from_string(msg)
!         tokens = tokenize(msg)
!         return self.state.bayes.spamprob(tokens, evidence=True)
  
! def form_to_mime(form, attachments, extra_tokens):
      msg = Message.Message()
      msg.set_type("multipart/digest")
      main = Message.Message()
!     main.set_payload(" ".join([str(v) for v in form.values()]))
      msg.attach(main)
      for msg_type, content in attachments:
--- 84,145 ----
          # Path is only enforced in Python 2.5 and later but we set it anyway.
          self.server.RequestHandlerClass.rpc_paths = (path,)
!         self.server.register_instance(self)
          self.thread = threading.Thread(target=self.server.serve_forever)
          self.thread.start()
  
!     def _dispatch(self, method, params):
!         if method in ("score", "score_mime", "train", "train_mime"):
!             return getattr(self, method)(*params)
!         elif method in ("retrain", "get_corpus", "set_corpus"):
!             return "%s not yet implemented" % method
!         else:
!             raise xmlrpclib.Fault(404, '"%s" is not supported' % method)
! 
!     def score(self, form_dict, extra_tokens):
!         """Score a dictionary + extra tokens."""
!         mime_message = form_to_mime(form_dict, extra_tokens)
          return self.score_mime(mime_message)
  
!     def score_mime(self, msg, encoding):
!         """Score a message representing a MIME document.
! 
!         The msg argument will be a string in the given encoding.
!         """
!         tokens = self.tokenize(msg, encoding)
!         return self.state.bayes.spamprob(tokens, evidence=True)
! 
!     def train(self, form_dict, extra_tokens, is_spam):
!         """Train the form and extra tokens."""
!         mime_message = form_to_mime(form_dict, extra_tokens)
!         return self.train_mime(mime_message, is_spam)
! 
!     def train_mime(self, msg, is_spam):
!         """Train the message."""
!         tokens = self.tokenize(msg, encoding)
!         return self.state.bayes.learn(tokens, is_spam)
! 
!     def tokenize(self, msg, encoding):
!         """Tokenize the message.  Make sure the bayes instance is available."""
          if self.state.bayes is None:
              self.state.create_workers()
!         msg = unicode(msg, encoding)
          msg = message_from_string(msg)
!         return tokenize(msg)
  
! def form_to_mime(form, mime_type, extra_tokens):
!     """Encode submission form bits as a MIME message.
! 
!     form - a dictionary of key/value pairs representing the form's contents
!     extra_tokens - a sequence of synthetic tokens generated by the caller.
!     For example, if you include a honeypot hidden field in your form, you
!     might generate a synthetic token which tells if it was filled in or not.
!     You might also generate tokens which indicate how long a submitting
!     username has existed or how many successful posts that username has
!     submitted.
!     """
      msg = Message.Message()
      msg.set_type("multipart/digest")
      main = Message.Message()
!     main.set_payload(" ".join(["%s:%s" % (k, v) for (k, v) in form.items()]))
      msg.attach(main)
      for msg_type, content in attachments: