[Spambayes-checkins] spambayes/spambayes/test README.txt, NONE, 1.1 test_storage.py, NONE, 1.1

Thu Jul 24 21:22:06 EDT 2003

Update of /cvsroot/spambayes/spambayes/spambayes/test
In directory sc8-pr-cvs1:/tmp/cvs-serv29376

Added Files:
	README.txt test_storage.py 
Log Message:
A little unit test suite - currently checks the classifier's storage.


--- NEW FILE: README.txt ---
This is a directory for unit tests of spambayes itself.
It is NOT to test the performance of the classification algorithms,
but tests the semantics of the insfrastructure.

--- NEW FILE: test_storage.py ---
# Test the basic storage operations of the classifier.

import unittest, os, sys

# Hack sys.path - 2 levels up must be on sys.path.
try:
    this_file = __file__
except NameError:
    this_file = sys.argv[0]
sb_dir = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(this_file))))
if sb_dir not in sys.path:
    sys.path.append(sb_dir)

from spambayes.storage import DBDictClassifier, PickledClassifier

class _StorageTestBase(unittest.TestCase):
    StorageClass = None

    def setUp(self):
        import tempfile
        self.db_name = tempfile.mktemp("spambayestest")
        self.classifier = self.__class__.StorageClass(self.db_name)

    def tearDown(self):
        self.classifier = None
        if os.path.isfile(self.db_name):
            os.remove(self.db_name)
    
    def _checkWordCounts(self, word, expected_ham, expected_spam):
        assert word
        info = self.classifier._wordinfoget(word)
        if info is None:
            if expected_ham==0 and expected_spam==0:
                return
            self.fail("_CheckWordCounts for '%s' got None!")
        if info.hamcount != expected_ham:
            self.fail("Hamcount '%s' wrong - got %d, but expected %d" \
                        % (word, info.hamcount, expected_ham))
        if info.spamcount != expected_spam:
            self.fail("Spamcount '%s' wrong - got %d, but expected %d" \
                        % (word, info.spamcount, expected_spam))

    def _checkAllWordCounts(self, counts, do_persist):
        for info in counts:
            self._checkWordCounts(*info)
        if do_persist:
            self.classifier.store()
            self.classifier.load()
            self._checkAllWordCounts(counts, False)

    def testHapax(self):
        self._dotestHapax(False)
        self._dotestHapax(True)

    def _dotestHapax(self, do_persist):
        c = self.classifier
        c.learn(["common","nearly_hapax", "hapax", ], False)
        c.learn(["common","nearly_hapax"], False)
        c.learn(["common"], False)
        # All the words should be there.
        self._checkAllWordCounts( (("common", 3, 0),
                                   ("nearly_hapax", 2, 0),
                                   ("hapax", 1, 0)),
                                  do_persist)
        # Unlearn the complete set.
        c.unlearn(["common","nearly_hapax", "hapax", ], False)
        # 'hapax' removed, rest still there
        self._checkAllWordCounts( (("common", 2, 0),
                                   ("nearly_hapax", 1, 0),
                                   ("hapax", 0, 0)),
                                  do_persist)
        # Re-learn that set, so deleted hapax is reloaded
        c.learn(["common","nearly_hapax", "hapax", ], False)
        self._checkAllWordCounts( (("common", 3, 0),
                                   ("nearly_hapax", 2, 0),
                                   ("hapax", 1, 0)),
                                  do_persist)
        # Back to where we started - start unlearning all down to zero.
        c.unlearn(["common","nearly_hapax", "hapax", ], False)
        # 'hapax' removed, rest still there
        self._checkAllWordCounts( (("common", 2, 0),
                                   ("nearly_hapax", 1, 0),
                                   ("hapax", 0, 0)),
                                  do_persist)

        # Unlearn the next set.
        c.unlearn(["common","nearly_hapax"], False)
        self._checkAllWordCounts( (("common", 1, 0),
                                   ("nearly_hapax", 0, 0),
                                   ("hapax", 0, 0)),
                                  do_persist)

        
        c.unlearn(["common"], False)
        self._checkAllWordCounts( (("common", 0, 0),
                                   ("nearly_hapax", 0, 0),
                                   ("hapax", 0, 0)),
                                  do_persist)

# Test classes for each classifier.
class PickleStorageTestCase(_StorageTestBase):
    StorageClass = PickledClassifier
    def setUp(self):
        return _StorageTestBase.setUp(self)

class DBStorageTestCase(_StorageTestBase):
    StorageClass = DBDictClassifier
    def setUp(self):
        return _StorageTestBase.setUp(self)
    def tearDown(self):
        self.classifier.db.close()
        _StorageTestBase.tearDown(self)

def suite():
    # We dont want our base class run
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(PickleStorageTestCase))
    suite.addTest(unittest.makeSuite(DBStorageTestCase))
    return suite

if __name__=='__main__':
    unittest.main(argv=sys.argv + ['suite'])