[Python-checkins] python/dist/src/Tools/scripts byext.py,NONE,1.1

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Wed, 01 Jan 2003 06:41:27 -0800


Update of /cvsroot/python/python/dist/src/Tools/scripts
In directory sc8-pr-cvs1:/tmp/cvs-serv5893

Added Files:
	byext.py 
Log Message:
Silly little script to print statistics (files, lines, words) by
extension.  Could use some work, but already very useful.


--- NEW FILE: byext.py ---
#! /usr/bin/env python

"""Show file statistics by extension."""

import os
import sys

class Stats:

    def __init__(self):
        self.stats = {}

    def statargs(self, args):
        for arg in args:
            if os.path.isdir(arg):
                self.statdir(arg)
            elif os.path.isfile(arg):
                self.statfile(arg)
            else:
                sys.stderr.write("Can't find %s\n" % file)
                self.addstats("<???>", "unknown", 1)

    def statdir(self, dir):
        self.addstats("<dir>", "dirs", 1)
        try:
            names = os.listdir(dir)
        except os.error, err:
            sys.stderr.write("Can't list %s: %s\n" % (file, err))
            self.addstats(ext, "unlistable", 1)
            return
        names.sort()
        for name in names:
            full = os.path.join(dir, name)
            if os.path.islink(full):
                self.addstats("<lnk>", "links", 1)
            elif os.path.isdir(full):
                self.statdir(full)
            else:
                self.statfile(full)

    def statfile(self, file):
        head, ext = os.path.splitext(file)
        head, base = os.path.split(file)
        if ext == base:
            ext = "" # .cvsignore is deemed not to have an extension
        self.addstats(ext, "files", 1)
        try:
            f = open(file, "rb")
        except IOError, err:
            sys.stderr.write("Can't open %s: %s\n" % (file, err))
            self.addstats(ext, "unopenable", 1)
            return
        data = f.read()
        f.close()
        self.addstats(ext, "bytes", len(data))
        if '\0' in data:
            self.addstats(ext, "binary", 1)
            return
        if not data:
            self.addstats(ext, "empty", 1)
        #self.addstats(ext, "chars", len(data))
        lines = data.splitlines()
        self.addstats(ext, "lines", len(lines))
        del lines
        words = data.split()
        self.addstats(ext, "words", len(words))

    def addstats(self, ext, key, n):
        d = self.stats.setdefault(ext, {})
        d[key] = d.get(key, 0) + n

    def report(self):
        totals = {}
        exts = self.stats.keys()
        exts.sort()
        # Get the column keys
        columns = {}
        for ext in exts:
            columns.update(self.stats[ext])
        cols = columns.keys()
        cols.sort()
        minwidth = 7
        extwidth = max([len(ext) for ext in exts])
        print "%*s" % (extwidth, "ext"),
        for col in cols:
            width = max(len(col), minwidth)
            print "%*s" % (width, col),
        print
        for ext in exts:
            print "%*s" % (extwidth, ext),
            for col in cols:
                width = max(len(col), minwidth)
                value = self.stats[ext].get(col)
                if value is None:
                    s = ""
                else:
                    s = "%d" % value
                    totals[col] = totals.get(col, 0) + value
                print "%*s" % (width, s),
            print
        print "%*s" % (extwidth, "TOTAL"),
        for col in cols:
            width = max(len(col), minwidth)
            print "%*s" % (width, totals[col]),
        print

def main():
    args = sys.argv[1:]
    if not args:
        args = [os.curdir]
    s = Stats()
    s.statargs(args)
    s.report()

if __name__ == "__main__":
    main()