[Python-checkins] python/nondist/sandbox/spambayes mboxcount.py,NONE,1.1
tim_one@users.sourceforge.net
tim_one@users.sourceforge.net
Thu, 22 Aug 2002 19:36:10 -0700
Update of /cvsroot/python/python/nondist/sandbox/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv29337
Added Files:
mboxcount.py
Log Message:
Utility to count and display the # of msgs in (one or more) Unix mboxes.
--- NEW FILE: mboxcount.py ---
#! /usr/bin/env python
"""Count the number of messages in Unix mboxes.
Usage: %(programs)s [-g] [-h] path1 ...
Options:
-h
Print this help message and exit
-g
Do globbing on each path. This is helpful on Windows, where the
native shells don't glob.
"""
"""
Stats for Barry's corpora, as of 22-Aug-2002:
\code\edu-sig-clean.mbox 252
\code\python-dev-clean.mbox 8326
\code\mailman-developers-clean.mbox 2427
\code\python-list-clean.mbox 85500
\code\zope3-clean.mbox 2177
"""
import sys
import mailbox
import email
import getopt
import glob
program = sys.argv[0]
def usage(code, msg=''):
print >> sys.stderr, __doc__
if msg:
print >> sys.stderr, msg
sys.exit(code)
def _factory(fp):
try:
return email.message_from_file(fp)
except email.Errors.MessageParseError:
return None
def count(fname):
fp = open(fname, 'rb')
mbox = mailbox.PortableUnixMailbox(fp, _factory)
count = 0
for msg in mbox:
count += 1
fp.close()
return count
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hg', ['help'])
except getopt.error, msg:
usage(1, msg)
doglob = False
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt == '-g':
doglob = True
for path in args:
if doglob:
fnames = glob.glob(path)
else:
fnames = [path]
for fname in fnames:
n = count(fname)
print "%-50s %7d" % (fname, n)
if __name__ == '__main__':
main()