Implementing file reading in C/Python

Marc 'BlackJack' Rintsch bj_666 at gmx.net
Fri Jan 9 05:32:34 EST 2009


On Fri, 09 Jan 2009 04:04:41 +0100, Johannes Bauer wrote:

> As this was horribly slow (20 Minutes for a 2GB file) I coded the whole
> thing in C also:

Yours took ~37 minutes for 2 GiB here.  This "just" ~15 minutes:

#!/usr/bin/env python
from __future__ import division, with_statement
import os
import sys
from collections import defaultdict
from functools import partial
from itertools import imap


def iter_max_values(blocks, block_count):
    for i, block in enumerate(blocks):
        histogram = defaultdict(int)
        for byte in block:
            histogram[byte] += 1
        
        yield max((count, byte)
                  for value, count in histogram.iteritems())[1]
        
        if i % 1024 == 0:
            print 'Progresss: %.1f%%' % (100 * i / block_count)


def write_pgm(filename, width, height, pixel_values):
    with open(filename, 'w') as pgm_file:
        pgm_file.write('P2\n'
                    '# CREATOR: Crappyass Python Script\n'
                    '%d %d\n'
                    '255\n' % (width, height))
        pgm_file.writelines('%d\n' % value for value in pixel_values)


def main():
    filename = sys.argv[1]
    filesize = os.path.getsize(filename)
    
    width = 1024
    height = 1024
    pixels = width * height
    blocksize = filesize // width // height
    
    print 'Filesize       : %d' % filesize
    print 'Image size     : %dx%d' % (width, height)
    print 'Bytes per Pixel: %d' % blocksize

    with open(filename, 'rb') as data_file:
        blocks = iter(partial(data_file.read, blocksize), '')
        pixel_values = imap(ord, iter_max_values(blocks, pixels))
        write_pgm(filename + '.pgm', width, height, pixel_values)


if __name__ == '__main__':
    main()


Ciao,
	Marc 'BlackJack' Rintsch



More information about the Python-list mailing list