[Tutor] Cluster analysis script error

Mike Armson michael.armson at gmail.com
Tue Oct 11 17:13:20 EDT 2016


Hi there,

I am trying to run a script (attached) which determines the number of
clusters of fixations within a set of eye movement data. The script says to
enter the following command on python:

"python fixationClusters.py outputPrefix input clusterDistance"

I am getting a syntax error with the output command after entering the
following:

"python /fixationClusters.py ETv2_cluster_output ETv2_cluster_input 50"

The syntax error is indicated with an arrow under the last "t" in "output".

I realize that "/fixationClusters.py" looks different from the
"fixationClusters.py" in the instructions of the script but this part
actually works. At least, it was giving me a syntax error for
"fixationClusters.py" but not for "/fixationClusters.py".

Also, the "ETv2_cluster" portion of the output command seems to be working,
as there were originally errors here, but I fixed them.

Thanks very much for your help!

Mike

-- 
*Mike Armson, PhD Candidate*
University of Toronto, Department of Psychology
Levine Lab, Rotman Research Institute, Baycrest
*p:*  (416) 785-2500 x.2826
*e:  marmson at research.baycrest.org <marmson at research.baycrest.org>*

<http://www.baycrest.org/research25years%20>
-------------- next part --------------
#!/usr/bin/env python
#
#   Script to find and report clusters of fixations
#
#   Clusters are defined automatically using a simple Euclidean distance metric
#   All fixations that are within D pixel units of each other are considered to
#   be in the same cluster.
#
#   Script Usage:   ./fixationClusters.py outputPrefix input clusterDistance
#           or      python fixationClusters.py outputPrefix input clusterDistance
#
#   input must be a csv file with 4 columns: Trial#, x-loc, y-loc and duration.
#                 a csv file with 5 columns: Subject#, Trial#, x-loc, y-loc and duration.
#       
#   The script returns a text file named outputPrefix.txt
#
#   The output is similar to the input, but with an additional column labeling
#   clusters instead of fixation duratoins. Each cluster is given a unique label
#
#   clusterDistance is the value (in pixel units) for which fixations are deemed
#   to be in the same cluster
#
#   Mark Chiew
#   Aug 2012
#
################################################################################
#   Imports (don't touch this stuff)
################################################################################
import numpy as np
import sys

################################################################################
#   Module Variables
################################################################################

#   List of subject numbers
subjects    =   []

#   Screen boundaries
xBound  =   1024
yBound  =   768

#   Sort fixations (1), or keep original temporal ordering (0)
sortFix =   1

#   Use alphabetic (1), instead of numeric (0) cluster labels
alphaLabels =   1
alpha       =   'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

#   Verbose controls whether extra output is printed to screen or not
verbose =   1

################################################################################
#   Functions
################################################################################
def __detectColumns(inputFile):
    """Detects and returns the number of columns in the input file"""

    data    =   np.loadtxt(inputFile, skiprows=0, delimiter=',')
    return len(data[0])

def __readData(inputFile, cols):
    """Assumes the input file is 4 or 5-column numeric. If 5, first column is subject 
    number, second column is trial number, third column is x-coord, fourth 
    column is y-coord, and fifth column is fixation duration. If 4, omit subject number.""" 

    data    =   np.loadtxt(inputFile, skiprows=0, delimiter=',')

    if cols ==  5:

        subjects=   tuple(set(map(int,data[:,0])))

        data    =   [np.array(filter(lambda z:z[0]==i, data)) for i in subjects]
        data    =   [map(lambda z:z[1:], x) for x in data]   

    elif cols == 4:

        subjects=   (1,)
        data    =   [data]


    data    =   [[np.array(filter(lambda z:z[0]==i, x)) for i in np.arange(1,x[-1][0]+1)] for x in data]

    for i in range(len(data)):
        if len(data[i]) == 0:
            data[i] =   np.array(['No Subject'])
        for j in range(len(data[i])):
            if len(data[i][j]) == 0:
                data[i][j] = np.array([[j+1,'NoTrial']])
                    
    for i in range(len(data)):
        data[i]   =   map(lambda z: z[:,1:], data[i])
    
    return data
    

def __checkOutofBoundsFixations(data):
    """Ignore fixations that occur outside of screen boundaries"""

    data    =   filter(lambda z: z[0] > 0, data)
    data    =   filter(lambda z: z[0] < xBound, data)
    data    =   filter(lambda z: z[1] > 0, data)
    data    =   filter(lambda z: z[1] < yBound, data)

    return np.array(data)


def prepData(inputFile):
    """Reads data from file"""

    cols    =   __detectColumns(inputFile)

    printVerbose('Reading data from '+inputFile)
    return __readData(inputFile, cols)
  
def calcDist(a, b):
    """Calculates the Euclidean distance between points a and b.  
    Assumes a, b are 2D ordered pairs"""

    return np.sqrt((b[0]-a[0])**2 + (b[1]-a[1])**2)


def updateClusters(clusters, pivot, indices):
    """Updates cluster list"""

    if len(indices) == 0:
        return clusters
    else:
        matches =   [clusters[i] for i in indices]
        index   =   np.min([np.min(matches), clusters[pivot]])
        clusters[pivot] =   index

        for i, value in enumerate(clusters):
            if value in matches:
                clusters[i] =   index
        
        return clusters

def prettyClusterNames(clusters):
    """Minimizes cluster labels, so that labels go from 1 - N for N clusters"""

    N   =   sorted(set(clusters))

    return map(lambda z: N.index(z), clusters)

def label(cluster):
    """Turns cluster labels into alphabetic characters, if necessary"""

    if alphaLabels:
        over    =   cluster/26
        idx     =   cluster%26
        if over:
            return alpha[over]+alpha[idx]
        else:
            return alpha[idx]
    else:
        return str(cluster+1)


def printVerbose(text):
    if verbose:
        print text
        

if __name__ == "__main__":
    """Execute main functions directly, without importing"""

    if len(sys.argv) < 3:
        print "Wrong number of arguments.\n\nUsage:python fixationClusters.py outputPrefix input clusterDistance\n\nPlease try again."
        sys.exit()

    outPrefix   =   sys.argv[1]
    inFile      =   sys.argv[2]
    cDistance   =   float(sys.argv[3])

    try:
        f   =   open(inFile)
        f.close()
    except:
        print "Error importing fixation file. Check file name and try again."
        sys.exit()


    data    =   prepData(inFile)

    clusters=   []
    for subject, datum in enumerate(data):
        clusters.append([])
        printVerbose('\nSubject %i'%(subject))
        output  =   []
        for trial, fixs in enumerate(datum): 
            cTemp   =   range(len(fixs))
            for index, coords in enumerate(fixs):
                temp    =   [i for i in range(index+1, len(fixs)) if calcDist(fixs[i],coords) < cDistance]
                cTemp   =   updateClusters(cTemp, index, temp)
            clusters[-1].append(prettyClusterNames(cTemp))
                    

    printVerbose('Writing data...\n')

    out     =   open(outPrefix+'.txt', 'w')
    out.write('%-7s, %-7s, %-7s, %-7s, %-7s\n'%('Subject', '  Trial', '  x-pos', '  y-pos', 'Cluster'))
    for i, subject in enumerate(clusters):
        for j, trial in enumerate(subject):
            if data[i][j][0] != 'NoTrial':
                if sortFix:
                    N   =   np.argsort(trial)
                    for k in N:
                        out.write('%7d, %7d, %7.1f, %7.1f, %7s\n'%(i+1, j+1, data[i][j][k][0], data[i][j][k][1], label(trial[k])))
                else:
                    for k, cluster in enumerate(trial):
                        out.write('%7d, %7d, %7.1f, %7.1f, %7s\n'%(i+1, j+1, data[i][j][k][0], data[i][j][k][1], label(cluster)))

    out.close()
    printVerbose('Output file %s written successfully\n'%(outPrefix+'.txt'))


More information about the Tutor mailing list