Python example: possible speedup?

Hrvoje Niksic hniksic at srce.hr
Wed Sep 8 14:01:27 EDT 1999


Hrvoje Niksic <hniksic at srce.hr> writes:

> But I'll try it [slurp all the lines at once and iterate over them],
> for the sake of argument, and see if the speedup is worth it.

Yes, this does speed things up.  Instead of 27 CPU seconds, the
program now spends 19.5 seconds of CPU time.  Along with:

> > Also, instead of calling next_header on a line-by-line basis, how
> > about calling it once per package and returning a dict containing
> > the header names as keys?

This buys me additional 2.5 CPU seconds.

The problem is that the thing is still twice slower than the
equivalent Perl program, which I haven't bothered to optimize at all.
Oh well.

In case anyone is still interested, after these two optimizations, the 
code looks like this:

#!/usr/bin/python

import string

class Dpkg_Reader:
    def __init__(self, file):
        self.__current = ''
        self.__lines = open(file).readlines()
        self.__index = 0

    def next_package(self):
        package = {}
        while 1:
            if self.__index == len(self.__lines):
                return None             # EOF
            line = self.__lines[self.__index]
            self.__index = self.__index + 1
            if self.__current:
                if line == "\n" or (line[0] != ' ' and line[0] != "\t"):
                    #print "{%s}" % (self.__current)
                    try:
                        name, value = string.split(self.__current, ': ', 1)
                    except:
                        name, value = string.split(self.__current, ':', 1)
                    value = value[:-1]
                    package[name] = value
                    if line == "\n":
                        self.__current = ''
                        return package
                    else:
                        self.__current = line
                        continue
            self.__current = self.__current + line

    def close(self):
        pass

def process_status(file):
    reader = Dpkg_Reader(file)
    installed = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        status = string.split(package['Status'], ' ')
        if status[2] == 'installed':
            installed[package['Package']] = 1
    reader.close()
    return installed

def process_available(file, installed):
    reader = Dpkg_Reader(file)
    sizes = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        pname = package['Package']
        if installed.has_key(pname):
            sizes[pname] = string.atoi(package['Installed-Size'])
    reader.close()
    return sizes

def main():
    installed = process_status('/var/lib/dpkg/status')
    sizes = process_available('/var/lib/dpkg/available', installed)
    lst = sizes.keys()
    lst.sort(lambda a, b, sizes=sizes: cmp(sizes[b], sizes[a]))
    for pack in lst:
        print "%s: %d" % (pack, sizes[pack])

if __name__ == '__main__':
    main()




More information about the Python-list mailing list