[SciPy-User] kmeans2 question/issue

James Abel j at abel.co
Wed Aug 8 23:53:19 EDT 2012


Thanks Gael.

 

BTW, I modified my code to loop until it gets the same clustering twice in a
row.  This yields more consistent results.  I don't know if this is a
general solution but it worked for my simple test case.  Code below.

 

James

 

import sys

import scipy

import warnings

from scipy.cluster.vq import *

 

print sys.version

vals = scipy.array((0.0,0.1,0.5,0.6,1.0,1.1))

print vals

white_vals = whiten(vals)

print white_vals.shape, white_vals

 

# Check for same clustering

def clustering_test(a,b):

    # have to create copies, then sort so we don't modify the original

    ea = a.copy()

    eb = b.copy()

    ea.sort()

    eb.sort()

    r = (ea == eb).all()

    print a,b,ea,eb,r

    return r

 

# try it until we get the same clustering twice in a row

found = False

prior_idx = None

while not found:

    with warnings.catch_warnings():

        warnings.simplefilter("ignore") # suppress the warning message
(happens if it doesn't find the right number of clusters)

        res, idx = kmeans2(white_vals, 3) # changing iter doesn't seem to
matter

    #print res, idx

    if prior_idx is not None:

        eq = clustering_test(idx, prior_idx)

        #print eq.all()

        if eq:

            found = True

    prior_idx = idx

print "result", res, idx

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.scipy.org/pipermail/scipy-user/attachments/20120808/e4a8b668/attachment.html>


More information about the SciPy-User mailing list