Help with script with performance problems

Peter Otten __peter__ at web.de
Sun Nov 23 18:36:55 EST 2003


Peter Otten wrote:

> However, it took 143 seconds to process 10**7 lines generated by

I just downloaded psycho, oops, keep misspelling the name :-) and it brings
down the time to 92 seconds - almost for free. I must say I'm impressed,
the psycologist(s) did an excellent job.

Peter

#!/usr/bin/python -u
import psyco, sys
psyco.full()

def main():
    clients = {}
    queries = {}
    lineNo = -1

    threshold = 100
    pointmod = 100000

    f = file(sys.argv[1])
    try:
        print "Each dot is %d lines..." % pointmod
        for lineNo, line in enumerate(f):
            if lineNo % pointmod == 0:
                sys.stdout.write(".")

            try:
                month, day, timestr, stype, source, qtype, query, ctype,
record = line.split()
            except ValueError:
                raise Exception("problem splitting line %d\n%s" % (lineNo,
line))

            source = source.split('#', 1)[0]

            clients[source] = clients.get(source, 0) + 1
            queries[query] = queries.get(query, 0) + 1
    finally:
        f.close()

    print
    print lineNo+1, "lines processed"

    for numclient, count in clients.iteritems():
        if count > threshold:
            print "%s,%s" % (numclient, count)

    for numquery, count in queries.iteritems():
        if count > threshold:
            print "%s,%s" % (numquery, count)

import time
starttime = time.time()
main()
print "time:", time.time() - starttime





More information about the Python-list mailing list