[Python-checkins] r56541 - in tracker/instances/python-dev-spambayes-integration: extensions/spambayes.py html/file.item.html html/msg.item.html schema.py

erik.forsberg python-checkins at python.org
Wed Jul 25 17:40:24 CEST 2007


Author: erik.forsberg
Date: Wed Jul 25 17:40:23 2007
New Revision: 56541

Added:
   tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
Modified:
   tracker/instances/python-dev-spambayes-integration/html/file.item.html
   tracker/instances/python-dev-spambayes-integration/html/msg.item.html
   tracker/instances/python-dev-spambayes-integration/schema.py
Log:

The beginnings of advanced spambayes integration;

 - Schema modified to include two new attributes on the file and msg
   class:

   * spambayes_score
   * spambayes_misclassified

 - New action added by extensions/spambayes.py for training msg/file as
   spam or ham.

 - item pages for file and msg classes modified to allow training by
   clicking on button.


Added: tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
==============================================================================
--- (empty file)
+++ tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py	Wed Jul 25 17:40:23 2007
@@ -0,0 +1,68 @@
+from roundup.cgi.actions import Action
+from roundup.cgi.exceptions import *
+
+import xmlrpclib, socket
+
+def extract_classinfo(db, classname, nodeid):
+    node = db.getnode(classname, nodeid)
+
+    authorage = node['creation'].timestamp() - \
+                db.getnode('user', node['author'])['creation'].timestamp()
+
+    tokens = ["klass:%s" % classname,
+              "author:%s" % node['author'],
+              "authorage:%d" % int(authorage)]
+
+    klass = db.getclass(classname)
+    return (klass.get(nodeid, 'content'), tokens)
+
+def train_spambayes(db, content, tokens, is_spam):
+    spambayes_uri = db.config.detectors['SPAMBAYES_URI']
+    spam_cutoff = float(db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+
+    server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+    try:
+        server.train({'content':content}, tokens, {}, is_spam)
+        return (True, None)
+    except (socket.error, xmlrpclib.Error), e:
+        return (False, str(e))
+
+
+class SpambayesClassify(Action):
+    def handle(self):
+        (content, tokens) = extract_classinfo(self.db,
+                                              self.classname, self.nodeid)
+
+        if self.form.has_key("trainspam"):
+            is_spam = True
+        elif self.form.has_key("trainham"):
+            is_spam = False
+
+        (status, errmsg) = train_spambayes(self.db, content, tokens,
+                                           is_spam)
+
+        node = self.db.getnode(self.classname, self.nodeid)
+        props = {}
+
+        if status:
+            if node.get('spambayes_misclassified', False):
+                props['spambayes_misclassified':True]
+
+            props['spambayes_score'] = 1.0
+            
+            s = " SPAM"
+            if not is_spam:
+                props['spambayes_score'] = 0.0
+                s = " HAM"
+            self.client.ok_message.append(self._('Message classified as') + s)
+        else:
+            self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
+
+        klass = self.db.getclass(self.classname)
+        klass.set(self.nodeid, **props)
+        self.db.commit()
+            
+
+def init(instance):
+    instance.registerAction("spambayes_classify", SpambayesClassify)
+    

Modified: tracker/instances/python-dev-spambayes-integration/html/file.item.html
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/html/file.item.html	(original)
+++ tracker/instances/python-dev-spambayes-integration/html/file.item.html	Wed Jul 25 17:40:23 2007
@@ -29,6 +29,15 @@
   <td style="border: none" tal:condition="python: context.is_edit_ok()">Please note that
   for security reasons, it's not permitted to set content type to <i>text/html</i>.</td>
  </tr>
+ <tr>
+  <th i18n:translate="">SpamBayes Score</th>
+  <td tal:content="structure context/spambayes_score/plain"></td>
+ </tr>
+
+ <tr>
+  <th i18n:translate="">Marked as misclassified</th>
+  <td tal:content="structure context/spambayes_misclassified/plain"></td>
+ </tr>
 
  <tr>
   <td>
@@ -48,6 +57,15 @@
  tal:attributes="href string:file${context/id}/${context/name}"
  i18n:translate="">download</a>
 
+     <form method="POST" onSubmit="return submit_once()"
+       enctype="multipart/form-data"
+       tal:attributes="action context/designator">
+ 
+      <input type="hidden" name="@action" value="spambayes_classify">
+      <input type="submit" name="trainspam" value="Mark as SPAM" i18n:attributes="value">
+      <input type="submit" name="trainham" value="Mark as HAM (not SPAM)" i18n:attributes="value">
+     </form>
+
 <tal:block tal:condition="context/id" tal:replace="structure context/history" />
 
 </td>

Modified: tracker/instances/python-dev-spambayes-integration/html/msg.item.html
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/html/msg.item.html	(original)
+++ tracker/instances/python-dev-spambayes-integration/html/msg.item.html	Wed Jul 25 17:40:23 2007
@@ -47,10 +47,34 @@
  <th i18n:translate="">Date</th>
  <td tal:content="context/date"></td>
 </tr>
+
+ <tr>
+  <th i18n:translate="">SpamBayes Score</th>
+  <td tal:content="structure context/spambayes_score/plain"></td>
+ </tr>
+
+ <tr>
+  <th i18n:translate="">Marked as misclassified</th>
+  <td tal:content="structure context/spambayes_misclassified/plain"></td>
+ </tr>
+
 </table>
 
 <table class="messages">
- <tr><th colspan=2 class="header" i18n:translate="">Content</th></tr>
+ <tr>
+   <th class="header" i18n:translate="">Content</th>
+   <th class="header">
+     <form method="POST" onSubmit="return submit_once()"
+       enctype="multipart/form-data"
+       tal:attributes="action context/designator">
+ 
+      <input type="hidden" name="@action" value="spambayes_classify">
+      <input type="submit" name="trainspam" value="Mark as SPAM" i18n:attributes="value">
+      <input type="submit" name="trainham" value="Mark as HAM (not SPAM)" i18n:attributes="value">
+     </form>
+   </th>
+
+</tr>
  <tr>
   <td class="content" colspan=2><pre tal:content="structure context/content/hyperlinked"></pre></td>
  </tr>

Modified: tracker/instances/python-dev-spambayes-integration/schema.py
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/schema.py	(original)
+++ tracker/instances/python-dev-spambayes-integration/schema.py	Wed Jul 25 17:40:23 2007
@@ -97,10 +97,14 @@
                 summary=String(),
                 files=Multilink("file"),
                 messageid=String(),
-                inreplyto=String())
+                inreplyto=String(),
+                spambayes_score=Number(),
+                spambayes_misclassified=Boolean(),)
 
 file = FileClass(db, "file",
-                name=String())
+                name=String(),
+                spambayes_score=Number(),
+                spambayes_misclassified=Boolean(),)
 
 # IssueClass automatically gets these properties in addition to the Class ones:
 #   title = String()


More information about the Python-checkins mailing list