[Spambayes-checkins] spambayes/spambayes/test test_sb_filter.py,
NONE, 1.1.2.1
Tony Meyer
anadelonbrin at users.sourceforge.net
Thu Jan 13 23:01:57 CET 2005
Update of /cvsroot/spambayes/spambayes/spambayes/test
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13184/spambayes/test
Added Files:
Tag: release_1_0-branch
test_sb_filter.py
Log Message:
Backport unittests for sb_filter.py
--- NEW FILE: test_sb_filter.py ---
# Test sb_filter script.
import os
import sys
import email
import unittest
import sb_test_support
sb_test_support.fix_sys_path()
from spambayes.Options import options
from spambayes.tokenizer import tokenize
from spambayes.storage import open_storage
import sb_filter
# We borrow the test messages that test_sb_server uses.
# I doubt it really makes much difference, but if we wanted more than
# one message of each type (the tests should all handle this ok) then
# Richie's hammer.py script has code for generating any number of
# randomly composed email messages.
from test_sb_server import good1, spam1
good1 = email.message_from_string(good1)
spam1 = email.message_from_string(spam1)
try:
__file__
except NameError:
# Python 2.2
__file__ = sys.argv[0]
TEMP_DBM_NAME = os.path.join(os.path.dirname(__file__), "temp.dbm")
# The chances of anyone having a file with this name in the test
# directory is minute, but we don't want to wipe anything, so make
# sure that it doesn't already exist. Our tearDown code gets rid
# of our copy (whether the tests pass or fail) so it shouldn't
# be ours.
if os.path.exists(TEMP_DBM_NAME):
print TEMP_DBM_NAME, "already exists. Please remove this file " \
"before running these tests (a file by that name will be " \
"created and destroyed as part of the tests)."
sys.exit(1)
class HammieFilterTest(unittest.TestCase):
def setUp(self):
self.h = sb_filter.HammieFilter()
self.h.dbname = TEMP_DBM_NAME
self.h.usedb = "dbm"
def tearDown(self):
if self.h.h:
self.h.close()
try:
os.remove(TEMP_DBM_NAME)
except OSError:
pass
def _fake_store(self):
self.done = True
def test_open(self):
mode = 'c'
self.h.open(mode)
self.assertEqual(self.h.mode, mode)
# Check the underlying classifier exists.
self.assert_(self.h.h is not None)
# This can also be called when there is an
# existing classifier, but we want to change
# mode. Verify that we store the old database
# first if we were not in readonly mode.
self.done = False
self.h.h.store = self._fake_store
mode = 'r'
self.h.open(mode)
self.assertEqual(self.h.mode, mode)
self.assert_(self.done)
def test_close_readonly(self):
# Must open with 'c' first, because otherwise it doesn't exist.
self.h.open('c')
self.h.open('r')
self.done = False
self.h.h.store = self._fake_store
# Verify that the classifier is not stored if we are
# in readonly mode.
self.h.close()
self.assert_(not self.done)
self.assertEqual(self.h.h, None)
def test_close(self):
self.h.open('c')
self.done = False
self.h.h.store = self._fake_store
# Verify that the classifier is stored if we are
# not in readonly mode.
self.h.close()
self.assert_(self.done)
self.assertEqual(self.h.h, None)
def test_newdb(self):
# Create an existing classifier.
b = open_storage(TEMP_DBM_NAME, "dbm")
b.learn(tokenize(spam1), True)
b.learn(tokenize(good1), False)
b.store()
b.close()
# Create the fresh classifier.
self.h.newdb()
# Verify that the classifier isn't open.
self.assertEqual(self.h.h, None)
# Verify that any existing classifier with the same name
# is overwritten.
b = open_storage(TEMP_DBM_NAME, "dbm")
self.assertEqual(b.nham, 0)
self.assertEqual(b.nspam, 0)
b.close()
def test_filter(self):
# Verify that the msg has the classification header added.
self.h.open('c')
self.h.h.bayes.learn(tokenize(good1), False)
self.h.h.bayes.learn(tokenize(spam1), True)
self.h.h.store()
result = email.message_from_string(self.h.filter(spam1))
self.assert_(result[options["Headers",
"classification_header_name"]].\
startswith(options["Headers", "header_spam_string"]))
result = email.message_from_string(self.h.filter(good1))
self.assert_(result[options["Headers",
"classification_header_name"]].\
startswith(options["Headers", "header_ham_string"]))
def test_filter_train(self):
# Verify that the msg has the classification header
# added, and that it was correctly trained.
self.h.open('c')
self.h.h.bayes.learn(tokenize(good1), False)
self.h.h.bayes.learn(tokenize(spam1), True)
self.h.h.store()
result = email.message_from_string(self.h.filter_train(spam1))
self.assert_(result[options["Headers",
"classification_header_name"]].\
startswith(options["Headers", "header_spam_string"]))
self.assertEqual(self.h.h.bayes.nspam, 2)
result = email.message_from_string(self.h.filter_train(good1))
self.assert_(result[options["Headers",
"classification_header_name"]].\
startswith(options["Headers", "header_ham_string"]))
self.assertEqual(self.h.h.bayes.nham, 2)
def test_train_ham(self):
# Verify that the classifier gets trained with the message.
self.h.open('c')
self.h.train_ham(good1)
self.assertEqual(self.h.h.bayes.nham, 1)
self.assertEqual(self.h.h.bayes.nspam, 0)
for token in tokenize(good1):
wi = self.h.h.bayes._wordinfoget(token)
self.assertEqual(wi.hamcount, 1)
self.assertEqual(wi.spamcount, 0)
def test_train_spam(self):
# Verify that the classifier gets trained with the message.
self.h.open('c')
self.h.train_spam(spam1)
self.assertEqual(self.h.h.bayes.nham, 0)
self.assertEqual(self.h.h.bayes.nspam, 1)
for token in tokenize(spam1):
wi = self.h.h.bayes._wordinfoget(token)
self.assertEqual(wi.hamcount, 0)
self.assertEqual(wi.spamcount, 1)
def test_untrain_ham(self):
self.h.open('c')
# Put a message in the classifier to be removed.
self.h.h.bayes.learn(tokenize(good1), False)
# Verify that the classifier gets untrained with the message.
self.h.untrain_ham(good1)
self.assertEqual(self.h.h.bayes.nham, 0)
self.assertEqual(self.h.h.bayes.nspam, 0)
for token in tokenize(spam1):
wi = self.h.h.bayes._wordinfoget(token)
self.assertEqual(wi, None)
def test_untrain_spam(self):
self.h.open('c')
# Put a message in the classifier to be removed.
self.h.h.bayes.learn(tokenize(spam1), True)
# Verify that the classifier gets untrained with the message.
self.h.untrain_spam(spam1)
self.assertEqual(self.h.h.bayes.nham, 0)
self.assertEqual(self.h.h.bayes.nspam, 0)
for token in tokenize(spam1):
wi = self.h.h.bayes._wordinfoget(token)
self.assertEqual(wi, None)
def suite():
suite = unittest.TestSuite()
for cls in (HammieFilterTest,
):
suite.addTest(unittest.makeSuite(cls))
return suite
if __name__=='__main__':
sb_test_support.unittest_main(argv=sys.argv + ['suite'])
More information about the Spambayes-checkins
mailing list