[SciPy-user] What can be improved ?
David M. Cooke
cookedm at physics.mcmaster.ca
Wed May 16 11:54:09 EDT 2007
On Wed, May 16, 2007 at 11:38:53AM +0200, Stef Mientki wrote:
> hello,
>
> I've just written a function,
> (with a lot of trial and error,
> converting strings to float, reshaping arrays etc)
> to read a tab delimited file, exported from Excel,
> and I'm glad it's working ok now.
>
> But I've the unpleasant feeling, that this function is written in a very
> clumsy way,
> so may I ask some guru for some comment about improvements.
First thing I would do would be to look at the csv module included with
Python :) That'll handle most of the parsing of the file, and you just
then have to worry about converting the columns from strings to arrays.
Also,
- from ... import * in functions is deprecated. You'll get a warning
- instead of appending to arrays using r_, build a list and convert it to an
array at the end.
import csv
import numpy as N
def readSenseWearTabFile(filename, print_info=False):
fo = open(filename, 'rb')
reader = csv.reader(fo, dialect='excel_tab')
column_names = reader.next()
if print_info:
for items in column_names:
print items
N = len(column_names)
start_time = reader.next()[0]
start = datetime(*strptime(start_time[0:16], "%Y-%m-%d %H:%M")[0:6])
prev_tyd = 0 # time of the previous sample
data = []
for vals in reader:
# calculate number of minutes from start
tyd = datetime(*strptime(vals[0][0:16], "%Y-%m-%d %H:%M")[0:6])
s = tyd - start
tyd = s.seconds/60 + s.days*24*60
# if there are sample-sets missing, fill them empty sample-sets
# (beware of sample reduction)
if tyd - prev_tyd > 1:
d = (tyd - prev_tyd) // SR
zero_vals = ([0] * N) * d
data.extend(zero_values)
prev_tyd = tyd # remember the time of this sample-set
vals[0] = tyd # replace the datetime with number of minutes
# be sure all lines are of equal length
# (sometimes Excel omits the last columns if they are empty)
while len(vals) < N:
vals.append(0)
def sfloat(s):
if s == '':
return 0.0
else:
return float(s)
data.append([sfloat(v) for v in vals])
data = N.array(data, len(data)dtype=float).T
return data
>
> thanks,
> Stef Mientki
>
>
> #
> ******************************************************************************
> #
> ******************************************************************************
> def Read_SenseWear_Tab_File (filename, Print_Info = False):
> from scipy import *
> from time import strptime
>
> # open the data file and read the column names (and print if desired)
> Datafile = open(filename,'r')
> line = Datafile.readline()
> column_names = line.rstrip('\n').split('\t')
> if Print_Info:
> for items in column_names: print items
>
> # initialize Number of columns and an empty sample-set
> N = len(column_names)
> zero_vals = N * [0]
> SR = 5
>
> # read the first dataline, to determine the start time
> # (we forget this first sampleset)
> line = Datafile.readline()
> vals = line.rstrip('\n').split('\t')
> start = datetime(*strptime(vals[0][0:16], "%Y-%m-%d %H:%M")[0:6])
> prev_tyd = 0 # time of the previous sample
>
> # create an empty array
> data = asarray([])
> sample_reduction = asarray([])
>
> # read and interpretate all lines in file
> for line in Datafile:
> # remove EOL, split the line on tabs
> vals = line.rstrip('\n').split('\t')
>
> # calculate number of minutes from start
> tyd = datetime(*strptime(vals[0][0:16], "%Y-%m-%d %H:%M")[0:6])
> s = tyd - start
> tyd = s.seconds/60 + s.days*24*60
>
> # if there are sample-sets missing, fill them empty sample-sets
> # (beware of sample reduction)
> if tyd - prev_tyd > 1:
> zero_vals = (( tyd - prev_tyd )/SR) * N * [0]
> data = r_[data, zero_vals]
>
> prev_tyd = tyd # remember the time of this sample-set
> vals[0] = tyd # replace the datetime with number of minutes
>
> # be sure all lines are of equal length
> # (sometimes Excel omits the last columns if they are empty)
> if len(vals) < N:
> vals = vals + ( N- len(vals) )*[0]
>
> # replace empty strings, otherwise float conversion raises an error
> for i in range(len(vals)):
> if vals[i] == '' : vals[i] = '0'
>
> # convert the string vector to a float vector
> # VERY STRANGE: the next 2 operation may not be done at once
> vals = asarray(vals)
> vals = vals.astype(float)
>
> # append new sampleset, with a sample reduction of 5
> sample_reduction = r_ [ sample_reduction, vals ]
> if len(sample_reduction) == SR * N:
>
> # reshape sample array, for easy ensemble average
> sample_reduction = sample_reduction.reshape(SR, N)
> sample_reduction = sample_reduction.mean(0)
>
> # add mean value of SAMPLE_REDUCTION sample-sets to the total array
> # and clear the averaging sample-set
> data = r_[data, sample_reduction]
> sample_reduction = asarray([])
>
> # reshape into N signal vectors
> data = data.reshape(size(data)/N,N)
> data = transpose(data)
>
> return data
> #
> ******************************************************************************
>
>
> Kamer van Koophandel - handelsregister 41055629 / Netherlands Chamber of Commerce - trade register 41055629
>
>
> _______________________________________________
> SciPy-user mailing list
> SciPy-user at scipy.org
> http://projects.scipy.org/mailman/listinfo/scipy-user
>
--
|>|\/|<
/--------------------------------------------------------------------------\
|David M. Cooke http://arbutus.physics.mcmaster.ca/dmc/
|cookedm at physics.mcmaster.ca
More information about the SciPy-User
mailing list