Newbie - converting csv files to arrays in NumPy - Matlab vs. Numpy comparison

skip at pobox.com skip at pobox.com
Sat Jan 13 19:58:45 EST 2007


    oyekomova> def read_test():
    oyekomova>     start  = time.clock()
    oyekomova>     reader = csv.reader( file('data.txt') )
    oyekomova>     data   = [ map(float, row) for row in reader ]
    oyekomova>     data   = array(data, dtype = float)
    oyekomova>     print 'Data size', len(data)
    oyekomova>     print 'Elapsed', time.clock() - start

You have the entire file in memory as well as the entire array.  Try
operating line-by-line.

    #!/usr/bin/env python

    import array
    import time
    import random
    import csv

    def make_data(nrows=1000000, cols=6):
        counter = range(cols)
        writer = csv.writer(open('data.txt', 'wt'))
        for row in xrange(nrows):
            writer.writerow([random.random() for x in counter])

    def read_test():
        reader = csv.reader( file('data.txt') )
        data = array.array('f')
        for row in reader:
            data.extend(map(float, row))
        print 'Data size', len(data)

    start = time.clock()
    make_data()
    print "generate data:", (time.clock()-start)

    start = time.clock()
    read_test()
    print "read data:", (time.clock()-start)

Skip



More information about the Python-list mailing list