[Tutor] Cluster analysis script error
Mike Armson
michael.armson at gmail.com
Tue Oct 11 17:13:20 EDT 2016
Hi there,
I am trying to run a script (attached) which determines the number of
clusters of fixations within a set of eye movement data. The script says to
enter the following command on python:
"python fixationClusters.py outputPrefix input clusterDistance"
I am getting a syntax error with the output command after entering the
following:
"python /fixationClusters.py ETv2_cluster_output ETv2_cluster_input 50"
The syntax error is indicated with an arrow under the last "t" in "output".
I realize that "/fixationClusters.py" looks different from the
"fixationClusters.py" in the instructions of the script but this part
actually works. At least, it was giving me a syntax error for
"fixationClusters.py" but not for "/fixationClusters.py".
Also, the "ETv2_cluster" portion of the output command seems to be working,
as there were originally errors here, but I fixed them.
Thanks very much for your help!
Mike
--
*Mike Armson, PhD Candidate*
University of Toronto, Department of Psychology
Levine Lab, Rotman Research Institute, Baycrest
*p:* (416) 785-2500 x.2826
*e: marmson at research.baycrest.org <marmson at research.baycrest.org>*
<http://www.baycrest.org/research25years%20>
-------------- next part --------------
#!/usr/bin/env python
#
# Script to find and report clusters of fixations
#
# Clusters are defined automatically using a simple Euclidean distance metric
# All fixations that are within D pixel units of each other are considered to
# be in the same cluster.
#
# Script Usage: ./fixationClusters.py outputPrefix input clusterDistance
# or python fixationClusters.py outputPrefix input clusterDistance
#
# input must be a csv file with 4 columns: Trial#, x-loc, y-loc and duration.
# a csv file with 5 columns: Subject#, Trial#, x-loc, y-loc and duration.
#
# The script returns a text file named outputPrefix.txt
#
# The output is similar to the input, but with an additional column labeling
# clusters instead of fixation duratoins. Each cluster is given a unique label
#
# clusterDistance is the value (in pixel units) for which fixations are deemed
# to be in the same cluster
#
# Mark Chiew
# Aug 2012
#
################################################################################
# Imports (don't touch this stuff)
################################################################################
import numpy as np
import sys
################################################################################
# Module Variables
################################################################################
# List of subject numbers
subjects = []
# Screen boundaries
xBound = 1024
yBound = 768
# Sort fixations (1), or keep original temporal ordering (0)
sortFix = 1
# Use alphabetic (1), instead of numeric (0) cluster labels
alphaLabels = 1
alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
# Verbose controls whether extra output is printed to screen or not
verbose = 1
################################################################################
# Functions
################################################################################
def __detectColumns(inputFile):
"""Detects and returns the number of columns in the input file"""
data = np.loadtxt(inputFile, skiprows=0, delimiter=',')
return len(data[0])
def __readData(inputFile, cols):
"""Assumes the input file is 4 or 5-column numeric. If 5, first column is subject
number, second column is trial number, third column is x-coord, fourth
column is y-coord, and fifth column is fixation duration. If 4, omit subject number."""
data = np.loadtxt(inputFile, skiprows=0, delimiter=',')
if cols == 5:
subjects= tuple(set(map(int,data[:,0])))
data = [np.array(filter(lambda z:z[0]==i, data)) for i in subjects]
data = [map(lambda z:z[1:], x) for x in data]
elif cols == 4:
subjects= (1,)
data = [data]
data = [[np.array(filter(lambda z:z[0]==i, x)) for i in np.arange(1,x[-1][0]+1)] for x in data]
for i in range(len(data)):
if len(data[i]) == 0:
data[i] = np.array(['No Subject'])
for j in range(len(data[i])):
if len(data[i][j]) == 0:
data[i][j] = np.array([[j+1,'NoTrial']])
for i in range(len(data)):
data[i] = map(lambda z: z[:,1:], data[i])
return data
def __checkOutofBoundsFixations(data):
"""Ignore fixations that occur outside of screen boundaries"""
data = filter(lambda z: z[0] > 0, data)
data = filter(lambda z: z[0] < xBound, data)
data = filter(lambda z: z[1] > 0, data)
data = filter(lambda z: z[1] < yBound, data)
return np.array(data)
def prepData(inputFile):
"""Reads data from file"""
cols = __detectColumns(inputFile)
printVerbose('Reading data from '+inputFile)
return __readData(inputFile, cols)
def calcDist(a, b):
"""Calculates the Euclidean distance between points a and b.
Assumes a, b are 2D ordered pairs"""
return np.sqrt((b[0]-a[0])**2 + (b[1]-a[1])**2)
def updateClusters(clusters, pivot, indices):
"""Updates cluster list"""
if len(indices) == 0:
return clusters
else:
matches = [clusters[i] for i in indices]
index = np.min([np.min(matches), clusters[pivot]])
clusters[pivot] = index
for i, value in enumerate(clusters):
if value in matches:
clusters[i] = index
return clusters
def prettyClusterNames(clusters):
"""Minimizes cluster labels, so that labels go from 1 - N for N clusters"""
N = sorted(set(clusters))
return map(lambda z: N.index(z), clusters)
def label(cluster):
"""Turns cluster labels into alphabetic characters, if necessary"""
if alphaLabels:
over = cluster/26
idx = cluster%26
if over:
return alpha[over]+alpha[idx]
else:
return alpha[idx]
else:
return str(cluster+1)
def printVerbose(text):
if verbose:
print text
if __name__ == "__main__":
"""Execute main functions directly, without importing"""
if len(sys.argv) < 3:
print "Wrong number of arguments.\n\nUsage:python fixationClusters.py outputPrefix input clusterDistance\n\nPlease try again."
sys.exit()
outPrefix = sys.argv[1]
inFile = sys.argv[2]
cDistance = float(sys.argv[3])
try:
f = open(inFile)
f.close()
except:
print "Error importing fixation file. Check file name and try again."
sys.exit()
data = prepData(inFile)
clusters= []
for subject, datum in enumerate(data):
clusters.append([])
printVerbose('\nSubject %i'%(subject))
output = []
for trial, fixs in enumerate(datum):
cTemp = range(len(fixs))
for index, coords in enumerate(fixs):
temp = [i for i in range(index+1, len(fixs)) if calcDist(fixs[i],coords) < cDistance]
cTemp = updateClusters(cTemp, index, temp)
clusters[-1].append(prettyClusterNames(cTemp))
printVerbose('Writing data...\n')
out = open(outPrefix+'.txt', 'w')
out.write('%-7s, %-7s, %-7s, %-7s, %-7s\n'%('Subject', ' Trial', ' x-pos', ' y-pos', 'Cluster'))
for i, subject in enumerate(clusters):
for j, trial in enumerate(subject):
if data[i][j][0] != 'NoTrial':
if sortFix:
N = np.argsort(trial)
for k in N:
out.write('%7d, %7d, %7.1f, %7.1f, %7s\n'%(i+1, j+1, data[i][j][k][0], data[i][j][k][1], label(trial[k])))
else:
for k, cluster in enumerate(trial):
out.write('%7d, %7d, %7.1f, %7.1f, %7s\n'%(i+1, j+1, data[i][j][k][0], data[i][j][k][1], label(cluster)))
out.close()
printVerbose('Output file %s written successfully\n'%(outPrefix+'.txt'))
More information about the Tutor
mailing list