Python example: possible speedup?

Martin von Loewis loewis at informatik.hu-berlin.de
Wed Sep 8 15:56:24 EDT 1999


Hrvoje Niksic <hniksic at srce.hr> writes:

> The problem is that the thing is still twice slower than the
> equivalent Perl program, which I haven't bothered to optimize at all.
> Oh well.
> 
> In case anyone is still interested, after these two optimizations, the 
> code looks like this:

Starting from your last version, I performed a number of additional
optimizations:
- get rid of members, localize remaining members and module accesses
- use find instead of split
- reduce number of string slicings

With that, I've reduced the time from 7s to 3.7s on my machine, with
my local data. Please find the code below.

Regards,
Martin

#!/usr/local/bin/python

import string

class Dpkg_Reader:
    def __init__(self, file):
        self.lines = open(file).readlines()
        self.index = 0

    def next_package(self):
        package = {}
        if self.index == len(self.lines):
            return None             # EOF
        index = self.index          # Localize everything
        lines = self.lines
        find = string.find
        current = lines[index]      # Get the first line
        index = index + 1
        while 1:
            line = lines[index]
            index = index + 1
            if line[0] == ' ' or line[0] == '\t':
                current = current + line
                continue
            pos = find(current, ':')
            name = current[:pos]
            if current[pos+1] == ' ':       # Skip space following :
                value = current[pos+2:-1]
            else:
                value = current[pos:-1]
            package[name] = value
            if line == "\n":
                self.index = index          # write back index before returning
                return package
            current = line

    def close(self):
        pass

def process_status(file):
    reader = Dpkg_Reader(file)
    installed = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        status = string.split(package['Status'], ' ')
        if status[2] == 'installed':
            installed[package['Package']] = 1
    reader.close()
    return installed

def process_available(file, installed):
    reader = Dpkg_Reader(file)
    sizes = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        pname = package['Package']
        if installed.has_key(pname):
            sizes[pname] = string.atoi(package['Installed-Size'])
    reader.close()
    return sizes

def main():
    import time
    start=time.time()
    installed = process_status('status')
    end=time.time()
    print "status:",end-start
    start=end
    sizes = process_available('available', installed)
    end=time.time()
    print "available:",end-start
    start=end
    lst = sizes.items()
    for i in range(0,len(lst)):
        k, v = lst[i]
        lst[i] = (v,k)
    lst.sort()
    end=time.time()
    print "sort:",end-start
    start=end
    #for pack in lst:
    #    print "%s: %d" % (pack[1],pack[0])

if __name__ == '__main__':
    main()





More information about the Python-list mailing list