[Python-checkins] r56541 - in tracker/instances/python-dev-spambayes-integration: extensions/spambayes.py html/file.item.html html/msg.item.html schema.py
erik.forsberg
python-checkins at python.org
Wed Jul 25 17:40:24 CEST 2007
Author: erik.forsberg
Date: Wed Jul 25 17:40:23 2007
New Revision: 56541
Added:
tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
Modified:
tracker/instances/python-dev-spambayes-integration/html/file.item.html
tracker/instances/python-dev-spambayes-integration/html/msg.item.html
tracker/instances/python-dev-spambayes-integration/schema.py
Log:
The beginnings of advanced spambayes integration;
- Schema modified to include two new attributes on the file and msg
class:
* spambayes_score
* spambayes_misclassified
- New action added by extensions/spambayes.py for training msg/file as
spam or ham.
- item pages for file and msg classes modified to allow training by
clicking on button.
Added: tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py
==============================================================================
--- (empty file)
+++ tracker/instances/python-dev-spambayes-integration/extensions/spambayes.py Wed Jul 25 17:40:23 2007
@@ -0,0 +1,68 @@
+from roundup.cgi.actions import Action
+from roundup.cgi.exceptions import *
+
+import xmlrpclib, socket
+
+def extract_classinfo(db, classname, nodeid):
+ node = db.getnode(classname, nodeid)
+
+ authorage = node['creation'].timestamp() - \
+ db.getnode('user', node['author'])['creation'].timestamp()
+
+ tokens = ["klass:%s" % classname,
+ "author:%s" % node['author'],
+ "authorage:%d" % int(authorage)]
+
+ klass = db.getclass(classname)
+ return (klass.get(nodeid, 'content'), tokens)
+
+def train_spambayes(db, content, tokens, is_spam):
+ spambayes_uri = db.config.detectors['SPAMBAYES_URI']
+ spam_cutoff = float(db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+
+ server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+ try:
+ server.train({'content':content}, tokens, {}, is_spam)
+ return (True, None)
+ except (socket.error, xmlrpclib.Error), e:
+ return (False, str(e))
+
+
+class SpambayesClassify(Action):
+ def handle(self):
+ (content, tokens) = extract_classinfo(self.db,
+ self.classname, self.nodeid)
+
+ if self.form.has_key("trainspam"):
+ is_spam = True
+ elif self.form.has_key("trainham"):
+ is_spam = False
+
+ (status, errmsg) = train_spambayes(self.db, content, tokens,
+ is_spam)
+
+ node = self.db.getnode(self.classname, self.nodeid)
+ props = {}
+
+ if status:
+ if node.get('spambayes_misclassified', False):
+ props['spambayes_misclassified':True]
+
+ props['spambayes_score'] = 1.0
+
+ s = " SPAM"
+ if not is_spam:
+ props['spambayes_score'] = 0.0
+ s = " HAM"
+ self.client.ok_message.append(self._('Message classified as') + s)
+ else:
+ self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
+
+ klass = self.db.getclass(self.classname)
+ klass.set(self.nodeid, **props)
+ self.db.commit()
+
+
+def init(instance):
+ instance.registerAction("spambayes_classify", SpambayesClassify)
+
Modified: tracker/instances/python-dev-spambayes-integration/html/file.item.html
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/html/file.item.html (original)
+++ tracker/instances/python-dev-spambayes-integration/html/file.item.html Wed Jul 25 17:40:23 2007
@@ -29,6 +29,15 @@
<td style="border: none" tal:condition="python: context.is_edit_ok()">Please note that
for security reasons, it's not permitted to set content type to <i>text/html</i>.</td>
</tr>
+ <tr>
+ <th i18n:translate="">SpamBayes Score</th>
+ <td tal:content="structure context/spambayes_score/plain"></td>
+ </tr>
+
+ <tr>
+ <th i18n:translate="">Marked as misclassified</th>
+ <td tal:content="structure context/spambayes_misclassified/plain"></td>
+ </tr>
<tr>
<td>
@@ -48,6 +57,15 @@
tal:attributes="href string:file${context/id}/${context/name}"
i18n:translate="">download</a>
+ <form method="POST" onSubmit="return submit_once()"
+ enctype="multipart/form-data"
+ tal:attributes="action context/designator">
+
+ <input type="hidden" name="@action" value="spambayes_classify">
+ <input type="submit" name="trainspam" value="Mark as SPAM" i18n:attributes="value">
+ <input type="submit" name="trainham" value="Mark as HAM (not SPAM)" i18n:attributes="value">
+ </form>
+
<tal:block tal:condition="context/id" tal:replace="structure context/history" />
</td>
Modified: tracker/instances/python-dev-spambayes-integration/html/msg.item.html
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/html/msg.item.html (original)
+++ tracker/instances/python-dev-spambayes-integration/html/msg.item.html Wed Jul 25 17:40:23 2007
@@ -47,10 +47,34 @@
<th i18n:translate="">Date</th>
<td tal:content="context/date"></td>
</tr>
+
+ <tr>
+ <th i18n:translate="">SpamBayes Score</th>
+ <td tal:content="structure context/spambayes_score/plain"></td>
+ </tr>
+
+ <tr>
+ <th i18n:translate="">Marked as misclassified</th>
+ <td tal:content="structure context/spambayes_misclassified/plain"></td>
+ </tr>
+
</table>
<table class="messages">
- <tr><th colspan=2 class="header" i18n:translate="">Content</th></tr>
+ <tr>
+ <th class="header" i18n:translate="">Content</th>
+ <th class="header">
+ <form method="POST" onSubmit="return submit_once()"
+ enctype="multipart/form-data"
+ tal:attributes="action context/designator">
+
+ <input type="hidden" name="@action" value="spambayes_classify">
+ <input type="submit" name="trainspam" value="Mark as SPAM" i18n:attributes="value">
+ <input type="submit" name="trainham" value="Mark as HAM (not SPAM)" i18n:attributes="value">
+ </form>
+ </th>
+
+</tr>
<tr>
<td class="content" colspan=2><pre tal:content="structure context/content/hyperlinked"></pre></td>
</tr>
Modified: tracker/instances/python-dev-spambayes-integration/schema.py
==============================================================================
--- tracker/instances/python-dev-spambayes-integration/schema.py (original)
+++ tracker/instances/python-dev-spambayes-integration/schema.py Wed Jul 25 17:40:23 2007
@@ -97,10 +97,14 @@
summary=String(),
files=Multilink("file"),
messageid=String(),
- inreplyto=String())
+ inreplyto=String(),
+ spambayes_score=Number(),
+ spambayes_misclassified=Boolean(),)
file = FileClass(db, "file",
- name=String())
+ name=String(),
+ spambayes_score=Number(),
+ spambayes_misclassified=Boolean(),)
# IssueClass automatically gets these properties in addition to the Class ones:
# title = String()
More information about the Python-checkins
mailing list