How to get the size of a file?

Bengt Richter bokr at oz.net
Sun Oct 17 05:45:17 EDT 2004


On Sun, 17 Oct 2004 06:29:36 GMT, bokr at oz.net (Bengt Richter) wrote:
[...]
>
>This should list duplicate files in the specified directory:
>You can hack to suit. Not very tested. Just what you see ;-)
>------------------------------------------------
># get_dupes.py
[... version which only worked for current working directory...]
Phooey. Hopefully better:

----------------------------------------------------------------------------
# get_dupes.py
import os, md5
def get_dupes(thedir):
    finfo = {}
    for f in os.listdir(thedir):
        p = os.path.join(thedir, f)
        if os.path.isfile(p):
            finfo.setdefault(os.path.getsize(p), []).append(f)

    result = []
    for size, flist in finfo.items():
        if len(flist)>1:
            dupes = {}
            for name in flist:
                dupes.setdefault(md5.new(open(os.path.join(thedir, name), 'rb'
                                         ).read()).hexdigest(),[]).append(name)
            for digest, names in dupes.items():
                if len(names)>1: result.append((size, digest, names))
    return result

if __name__ == '__main__':
    import sys
    try:
        dupes = get_dupes(sys.argv[1])
        if dupes:
            print
            print '%8s %32s %s' % ('size','md5 digest','files with the given size, digest')
            print '%8s %32s %s' % ('----','-'*32      ,'---------------------------------')
            for duped in dupes:
                print '%8s %32s %s' % duped
        else:
            print 'No duplicate files in %r' % sys.argv[1]
    except:
        raise SystemExit, 'Usage: python get_dupes.py directory'
----------------------------------------------------------------------------------------------
Regards,
Bengt Richter



More information about the Python-list mailing list