Script Optimization

lev levlozhkin at gmail.com
Sun May 4 16:01:15 EDT 2008


> * Remove newlines introduced by email
> * Move imports to start of file
    used imports of the edited script you sent.
> * Change indentation from 8 spaces to 4
    I like using tabs because of the text editor I use, the script at
the end is with 4 though.
> * Move main() to bottom of script
> * Remove useless "pass" and "return" lines
    I replaced the return nothing lines with passes, but I like
keeping them in case the indentation is ever lost - makes it easy to
go back to original indentation
> * Temporarily change broken "chdir" line
    removed as many instances of chdir as possible (a few useless ones
to accomodate the functions - changed functions to not chdir as much),
that line seems to work... I made it in case the script is launched
with say: 'python somedir\someotherdir\script.py' rather than 'python
script.py', because I need it to work in it's own and parent
directory.
> * Split lines so they fit into 80 chars
> * Add spaces after commas
> * Use path.join instead of string interpolation
    in all cases when possible - done
> * rename rename() to rename_md5() because rename() shadows a function
> imported from os.
    renamed all functions to more understandable names (without
collisions)
> * Rename vars shadowing imported names
    renamed almost all vars to more understandable names
> * Improve logic for checking when to print help
    the example you gave me does pretty much the exact same thing as
before... (the options are either false or true depending on if the
argument was used, if false for both then no logic was done and help
is shown, which would be exactly the same if the did_something var
remained false.
> * Create emtpy md5 listing file if one doesn't exist
    I intended it to be a script to help ripping a specific mp3cd to
disk, not necessarily create checksum files, because i intend to
include the checksums file.
> * Add a comment for a dodgy-looking section
    The 4 folders to be renamed are intentional (this is for a
specific mp3cd with 4 album folders)

I added comments to explain what I was doing with the dictionary[x][1]
[1][0], and also what the indexes for the strings are used for ([3:]
to remove the 001 in 001Track.mp3, etc.)


Thanks for the advice so far,
lev

#!/usr/bin/env python

import md5
from glob import glob
from optparse import OptionParser
from os import chdir, path, rename, remove
from sys import argv, exit

def verify_checksum_set(checksums):
    checksums = open(checksums, 'r')
    changed_files = {}
    missing_files = []
    for fline in checksums.readlines():
        line = fline.split(' *')
        original_sum = line[0].upper()
        try:
            new_sum = calculate_checksum(line[1].strip())
            if  new_sum == original_sum:
                print '.',
                pass
            else:
                changed_files[line[1]] = (original_sum, new_sum)
                pass
        except IOError:
            missing_files.append(line[1])
            pass
        pass
    checksums.close()
    changed_files_keys = changed_files.keys()
    changed_files_keys.sort()
    missing_files.sort()
    print '\n'
    if len(changed_files) != 0:
        print 'File(s) changed:'
        for key in changed_files_keys:
            print key.strip('\n'), 'changed from:\n\t',
changed_files[key][0], \
                'to\n\t', changed_files[key][1]
            pass
        print '\n\t', len(changed_files), 'file(s) changed.\n'
        pass
    if len(missing_files) != 0:
        print 'File(s) not found:'
        for x in range(len(missing_files)):
            print '\t', missing_files[x]
            pass
        print '\n\t', len(missing_files), 'file(s) not found.\n'
        pass
    if not len(changed_files) and not len(missing_files):
        print "\n\tChecksums Verified\n"
        pass
    pass

def calculate_checksum(file_name):
    file_to_check = open(file_name, 'rb')
    chunk = 8196
    checksum = md5.new()
    while (True):
        chunkdata = file_to_check.read(chunk)
        if not chunkdata:
            break
        checksum.update(chunkdata)
        pass
    file_to_check.close()
    return checksum.hexdigest().upper()

def rename_file_set(new_dir_names, checksums):
    file_info = md5format(checksums)
    dirlist = glob('00[1-4]Volume [1-4]')
    dirlist.sort()
    for x in range(4):
        rename(dirlist[x], new_dir_names[x])
        print '\t', dirlist[x], 'renamed to:', new_dir_names[x]
        chdir(new_dir_names[x])
        for old_file_name in glob ('*.mp3'):
            # old_file_name[3:] is part of removing numbering:
'001Track ...'
            new_file_name = old_file_name[3:]
            rename(old_file_name, new_file_name)
            print '\t\t', old_file_name, 'renamed to:', new_file_name
            pass
        chdir('..')
        file_info = md5file_name_edit(file_info,dirlist[x],
            new_dir_names[x])
        pass
    md5write(file_info, checksums)
    replace_strings('The American Century.htm', dirlist,
new_dir_names)
    print '\n\tDirectories and Files renamed.'
    pass

def md5format(checksums):
    file_info = {}
    checksums = open(checksums, 'r')
    for line in checksums.readlines():
        splitline = line.split(' *')
        #original full filename = (checksum, [directory name, file
name])
        file_info[splitline[1]] = (splitline[0],splitline[1].split('\
\'))
        pass
    checksums.close()
    return file_info

def md5file_name_edit(file_info, old_dir_name, new_dir_name):
    for x in file_info.keys():
        dir_name_from_file = file_info[x][1][0]
        if dir_name_from_file == old_dir_name:
            checksum = file_info[x][0]
            file_name_from_file = file_info[x][1][1]
            #md5 format: 'C8109BF6B0EF724770A66CF4ED6251A7 *001Album
1\001Track.mp3'
            file_info[x] = (checksum, [new_dir_name,
file_name_from_file])
            #mp3cd numbering: '001Track.mp3, 002Track.mp3...'
            if file_name_from_file[0] == '0':
                file_info[x] =(checksum, [new_dir_name,
                    file_name_from_file[3:]])
                pass
            pass
        pass
    return file_info

def md5write(file_info, checksums):
    keys = file_info.keys()
    keys.sort()
    checksums = open(checksums, 'w')
    for x in keys:
        checksum = file_info[x][0]
        try:
            #when the file is one directory deep:
            #'C8109BF6B0EF724770A66CF4ED6251A7 *001Album
1\001Track.mp3'
            dir_name = file_info[x][1][0]
            file_name = file_info[x][1][1]
            checksums.writelines('%s *%s' % (checksum,
os.path.join(dir_name, \
                file_name)))
            pass
        except IndexError:
            #when the file is in root dir:
'007CC9C12342017709A2F19AF75247BD *010Track.mp3'
            file_name = file_info[x][1][0]
            checksums.writelines('%s *%s' % (checksum, file_name))
            pass
        pass
    checksums.close()
    pass

def replace_strings(file_name, oldlist, newlist):
    try:
        new_file = open(file_name, 'r').read();
        for x in range(4):
            new_file = new_file.replace(oldlist[x], newlist[x], 1)
            pass
        remove(file_name)
        file_name = open(file_name, 'w', len(new_file))
        file_name.write(new_file)
        file_name.close()
        pass
    except IOError:
        print file_name, 'not found'
        pass
    pass

def main():
    full_path = path.abspath(path.dirname(argv[0]))
    chdir(full_path)
    chdir('..')
    checksums = path.join(full_path, 'checksums.md5')
    new_dir_names = ('Volume 1 - 1889-1929', 'Volume 2 - 1929-1945', \
        'Volume 3 - 1945-1965', 'Volume 4 - 1963-1989')
    parser = OptionParser()
    parser.add_option ('-v', '--verify', action = 'store_true', \
        dest = 'verify', help = 'verify checksums')
    parser.add_option ('-r', '--rename', action = 'store_true', dest =
\
        'rename', help = \
        'rename files to a more usable form (write rights needed)')
    (options, args) = parser.parse_args()
    if options.verify:
        verify_checksum_set(checksums)
        pass
    if options.rename:
        rename_file_set(new_dir_names, checksums)
        pass
    if not options.verify and not options.rename:
        parser.print_help()
        pass
    pass

if __name__ == '__main__':
    main()



More information about the Python-list mailing list