[Spambayes-checkins] spambayes/utilities hammie2cdb.py,NONE,1.1

Neil Schemenauer nascheme at users.sourceforge.net
Tue Jun 29 16:38:50 EDT 2004


Update of /cvsroot/spambayes/spambayes/utilities
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11627

Added Files:
	hammie2cdb.py 
Log Message:
Simple script that converts hammie databases into cdb databases (usable
by CdbClassifier).


--- NEW FILE: hammie2cdb.py ---
#/usr/bin/env python
"""
Convert a hammie database to a cdb database.

usage %(prog)s [ -h ] [ -d <file> | -p <file> ] <cdbfile>

-h      - Print this usage message and exit.

-d file - Use a database-based classifier named file.

-p file - Use a pickle-based classifier named file.

"""

import sys
import getopt
from spambayes import cdb
from spambayes import storage
from spambayes.cdb_classifier import CdbClassifier

def usage(msg=None):
    if msg is not None:
        print >> sys.stderr, msg
    print >> sys.stderr, __doc__.strip() % globals()

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:p:",
                                   ["help", "database=", "pickle="])
    except getopt.GetoptError, msg:
        usage(msg)
        return 1

    if len(args) != 1:
        usage(msg)
        return 1
    cdbname = args[0]

    dbname = usedb = None
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
            
    dbname, usedb = storage.database_type(opts)
    store = storage.open_storage(dbname, usedb)

    bayes = CdbClassifier()
    items = []
    for word in store._wordinfokeys():
        record = store._wordinfoget(word)
        prob = store.probability(record)
        items.append((word, str(prob)))
    cdbfile = open(cdbname, "wb")
    cdb.cdb_make(cdbfile, items)
    cdbfile.close()

main()




More information about the Spambayes-checkins mailing list