How to get the size of a file?
Bengt Richter
bokr at oz.net
Sun Oct 17 05:45:17 EDT 2004
On Sun, 17 Oct 2004 06:29:36 GMT, bokr at oz.net (Bengt Richter) wrote:
[...]
>
>This should list duplicate files in the specified directory:
>You can hack to suit. Not very tested. Just what you see ;-)
>------------------------------------------------
># get_dupes.py
[... version which only worked for current working directory...]
Phooey. Hopefully better:
----------------------------------------------------------------------------
# get_dupes.py
import os, md5
def get_dupes(thedir):
finfo = {}
for f in os.listdir(thedir):
p = os.path.join(thedir, f)
if os.path.isfile(p):
finfo.setdefault(os.path.getsize(p), []).append(f)
result = []
for size, flist in finfo.items():
if len(flist)>1:
dupes = {}
for name in flist:
dupes.setdefault(md5.new(open(os.path.join(thedir, name), 'rb'
).read()).hexdigest(),[]).append(name)
for digest, names in dupes.items():
if len(names)>1: result.append((size, digest, names))
return result
if __name__ == '__main__':
import sys
try:
dupes = get_dupes(sys.argv[1])
if dupes:
print
print '%8s %32s %s' % ('size','md5 digest','files with the given size, digest')
print '%8s %32s %s' % ('----','-'*32 ,'---------------------------------')
for duped in dupes:
print '%8s %32s %s' % duped
else:
print 'No duplicate files in %r' % sys.argv[1]
except:
raise SystemExit, 'Usage: python get_dupes.py directory'
----------------------------------------------------------------------------------------------
Regards,
Bengt Richter
More information about the Python-list
mailing list