[Python-checkins] python/nondist/sandbox/spambayes mboxcount.py,NONE,1.1

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Thu, 22 Aug 2002 19:36:10 -0700


Update of /cvsroot/python/python/nondist/sandbox/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv29337

Added Files:
	mboxcount.py 
Log Message:
Utility to count and display the # of msgs in (one or more) Unix mboxes.


--- NEW FILE: mboxcount.py ---
#! /usr/bin/env python

"""Count the number of messages in Unix mboxes.

Usage: %(programs)s [-g] [-h] path1 ...
Options:

    -h
        Print this help message and exit
    -g
        Do globbing on each path.  This is helpful on Windows, where the
        native shells don't glob.
"""

"""
Stats for Barry's corpora, as of 22-Aug-2002:

\code\edu-sig-clean.mbox                               252
\code\python-dev-clean.mbox                           8326
\code\mailman-developers-clean.mbox                   2427
\code\python-list-clean.mbox                         85500
\code\zope3-clean.mbox                                2177
"""

import sys
import mailbox
import email
import getopt
import glob

program = sys.argv[0]

def usage(code, msg=''):
    print >> sys.stderr, __doc__
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)

def _factory(fp):
    try:
        return email.message_from_file(fp)
    except email.Errors.MessageParseError:
        return None

def count(fname):
    fp = open(fname, 'rb')
    mbox = mailbox.PortableUnixMailbox(fp, _factory)
    count = 0
    for msg in mbox:
        count += 1
    fp.close()
    return count

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hg', ['help'])
    except getopt.error, msg:
        usage(1, msg)

    doglob = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt == '-g':
            doglob = True

    for path in args:
        if doglob:
            fnames = glob.glob(path)
        else:
            fnames = [path]

        for fname in fnames:
            n = count(fname)
            print "%-50s %7d" % (fname, n)

if __name__ == '__main__':
    main()