From scipy-svn at scipy.org Fri Jun 1 04:22:13 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 1 Jun 2007 03:22:13 -0500 (CDT) Subject: [Scipy-svn] r3066 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070601082213.7FB4B39C02F@new.scipy.org> Author: cdavid Date: 2007-06-01 03:21:52 -0500 (Fri, 01 Jun 2007) New Revision: 3066 Removed: trunk/Lib/sandbox/pyem/kmean.py trunk/Lib/sandbox/pyem/tests/test_kmean.py Modified: trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/online_em.py trunk/Lib/sandbox/pyem/setup.py Log: Remove kmean as scipy.cluster.vq.kmeans2 does everything we need now Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-05-31 15:25:26 UTC (rev 3065) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-01 08:21:52 UTC (rev 3066) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Thu Nov 16 02:00 PM 2006 J +# Last Change: Fri Jun 01 05:00 PM 2007 J # TODO: # - which methods to avoid va shrinking to 0 ? There are several options, @@ -12,7 +12,8 @@ from numpy.random import randn #import _c_densities as densities import densities -from kmean import kmean +#from kmean import kmean +from scipy.cluster.vq import kmeans2 as kmean from gauss_mix import GM from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND Deleted: trunk/Lib/sandbox/pyem/kmean.py =================================================================== --- trunk/Lib/sandbox/pyem/kmean.py 2007-05-31 15:25:26 UTC (rev 3065) +++ trunk/Lib/sandbox/pyem/kmean.py 2007-06-01 08:21:52 UTC (rev 3066) @@ -1,76 +0,0 @@ -# /usr/bin/python -# Last Change: Thu Sep 28 01:00 PM 2006 J - -#TODO: -# - a demo for kmeans - -import numpy as N - -def _py_vq(data, code): - """ Please do not use directly. Use kmean instead""" - # No attempt to be efficient has been made... - (n, d) = data.shape - (k, d) = code.shape - - label = N.zeros(n, int) - for i in range(n): - d = N.sum((data[i, :] - code) ** 2, 1) - label[i] = N.argmin(d) - - return label - -# Try to import pyrex function for vector quantization. If not available, -# falls back on pure python implementation. -#%KMEANIMPORT% -#try: -# from scipy.cluster.vq import kmeans as kmean -#except ImportError: -# try: -# from c_gmm import _vq -# except: -# print """c_gmm._vq not found, using pure python implementation instead. -# Kmean will be REALLY slow""" -# _vq = _py_vq -try: - from scipy.cluster.vq import vq - print "using scipy.cluster.vq" - def _vq(*args, **kw): return vq(*args, **kw)[0] -except ImportError: - try: - from c_gmm import _vq - print "using pyrex vq" - except ImportError: - print """c_gmm._vq not found, using pure python implementation instead. - Kmean will be REALLY slow""" - _vq = _py_vq - -def kmean(data, init, iter = 10): - """Simple kmean implementation for EM. Runs iter iterations. - - returns a tuple (code, label), where code are the final - centroids, and label are the class label indec for each - frame (ie row) of data""" - - data = N.atleast_2d(data) - init = N.atleast_2d(init) - - (n, d) = data.shape - (k, d1) = init.shape - - if not d == d1: - msg = "data and init centers do not have same dimensions..." - raise GmmParamError(msg) - - code = N.asarray(init.copy()) - for i in range(iter): - # Compute the nearest neighbour for each obs - # using the current code book - label = _vq(data, code) - # Update the code by computing centroids using the new code book - for j in range(k): - code[j,:] = N.mean(data[N.where(label==j)], axis=0) - - return code, label - -if __name__ == "__main__": - pass Modified: trunk/Lib/sandbox/pyem/online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/online_em.py 2007-05-31 15:25:26 UTC (rev 3065) +++ trunk/Lib/sandbox/pyem/online_em.py 2007-06-01 08:21:52 UTC (rev 3066) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Wed Dec 06 09:00 PM 2006 J +# Last Change: Fri Jun 01 05:00 PM 2007 J #--------------------------------------------- # This is not meant to be used yet !!!! I am @@ -23,7 +23,7 @@ from gmm_em import ExpMixtureModel, GMM, EM from gauss_mix import GM -from kmean import kmean +from scipy.cluster.vq import kmeans2 as kmean import densities2 as D import copy Modified: trunk/Lib/sandbox/pyem/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/setup.py 2007-05-31 15:25:26 UTC (rev 3065) +++ trunk/Lib/sandbox/pyem/setup.py 2007-06-01 08:21:52 UTC (rev 3066) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Wed Dec 06 08:00 PM 2006 J +# Last Change: Fri Jun 01 05:00 PM 2007 J # TODO: # - check how to handle cmd line build options with distutils and use # it in the building process @@ -15,7 +15,6 @@ for estimating meta parameters of mixtures. """ from os.path import join -# This import from __init__ looks strange, should check whether there is no other way from info import version as pyem_version DISTNAME = 'pyem' @@ -32,12 +31,8 @@ config.add_data_dir('tests') config.add_data_dir('profile_data') config.add_extension('c_gden', - #define_macros=[('LIBSVM_EXPORTS', None), - # ('LIBSVM_DLL', None)], sources=[join('src', 'c_gden.c')]) config.add_extension('_rawden', - #define_macros=[('LIBSVM_EXPORTS', None), - # ('LIBSVM_DLL', None)], sources=[join('src', 'pure_den.c')]) return config @@ -47,108 +42,3 @@ #setup(**configuration(top_path='').todict()) #setup(**configuration(top_path='')) setup(configuration=configuration) -# from distutils.core import setup, Extension -# from pyem import version as pyem_version -# -# # distutils does not update MANIFEST correctly, removes it -# import os -# if os.path.exists('MANIFEST'): os.remove('MANIFEST') -# from os.path import join -# -# import re -# -# from numpy.distutils.misc_util import get_numpy_include_dirs -# NUMPYINC = get_numpy_include_dirs()[0] -# -# # General variables: -# # - DISTNAME: name of the distributed package -# # - VERSION: the version reference is in pyem/__init__.py file -# # - other upper cased variables are the same than the corresponding -# # keywords in setup call -# DISTNAME = 'pyem' -# VERSION = pyem_version -# DESCRIPTION ='A python module for Expectation Maximization learning of mixtures pdf', -# AUTHOR ='David Cournapeau', -# AUTHOR_EMAIL='david at ar.media.kyoto-u.ac.jp', -# URL ='http://ar.media.kyoto-u.ac.jp/members/david', -# -# # Source files for extensions -# -# # Functions used to substitute values in File. -# # Mainly use to replace config.h capabilities -# def do_subst_in_file(sourcefile, targetfile, dict): -# """Replace all instances of the keys of dict with their values. -# For example, if dict is {'%VERSION%': '1.2345', '%BASE%': 'MyProg'}, -# then all instances of %VERSION% in the file will be replaced with 1.2345 etc. -# """ -# try: -# f = open(sourcefile, 'rb') -# contents = f.read() -# f.close() -# except: -# raise IOError, "Can't read source file %s"%sourcefile -# -# for (k,v) in dict.items(): -# contents = re.sub(k, v, contents) -# try: -# f = open(targetfile, 'wb') -# f.write(contents) -# f.close() -# except: -# raise IOError, "Can't read source file %s"%sourcefile -# return 0 # success -# -# class SetupOption: -# def __init__(self): -# self.kmean = 'py' -# self.ext_modules= [Extension(join('pyem', 'c_gden'), -# sources=[join('pyem', 'src', 'c_gden.c')]) ] -# self.cmdclass = {} -# self.subsdic = {'%KMEANIMPORT%': []} -# -# def _config_kmean(self): -# # Check in this order: -# # - kmean in scipy.cluster, -# # - custom vq with pyrex -# # - custom pure python vq -# #try: -# # from scipy.cluster.vq import kmeans -# # self.kmean = 'scipy' -# # #self.subsdic['%KMEANIMPORT%'] = scipy_kmean -# #except ImportError: -# # try: -# # from Pyrex.Distutils import build_ext -# # self.kmean = 'pyrex' -# # self.ext_modules.append(Extension('pyem/c_gmm', -# # ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC])) -# # self.cmdclass['build_ext'] = build_ext -# # #self.subsdic['%KMEANIMPORT%'] = pyrex_kmean -# # except ImportError: -# # self.kmean = 'py' -# # #self.subsdic['%KMEANIMPORT%'] = pyrex_kmean -# try: -# from Pyrex.Distutils import build_ext -# self.kmean = 'pyrex' -# self.ext_modules.append(Extension('pyem/c_gmm', -# ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC])) -# self.cmdclass['build_ext'] = build_ext -# #self.subsdic['%KMEANIMPORT%'] = pyrex_kmean -# except ImportError: -# self.kmean = 'py' -# #self.subsdic['%KMEANIMPORT%'] = pyrex_kmean -# def setup(self): -# self._config_kmean() -# #import time -# #do_subst_in_file('pyem/kmean.py.in', 'pyem/kmean.py', self.subsdic) -# setup(name = DISTNAME, -# version = VERSION, -# description = DESCRIPTION, -# author = AUTHOR, -# author_email= AUTHOR_EMAIL, -# url = URL, -# packages = ['pyem', 'pyem.tests', 'pyem.profile_data'], -# ext_modules = self.ext_modules, -# cmdclass = self.cmdclass) -# -# stpobj = SetupOption() -# stpobj.setup() Deleted: trunk/Lib/sandbox/pyem/tests/test_kmean.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_kmean.py 2007-05-31 15:25:26 UTC (rev 3065) +++ trunk/Lib/sandbox/pyem/tests/test_kmean.py 2007-06-01 08:21:52 UTC (rev 3066) @@ -1,46 +0,0 @@ -#! /usr/bin/env python -# Last Change: Thu Sep 28 01:00 PM 2006 J - -import sys -from numpy.testing import * - -import numpy as N - -set_package_path() -from pyem.kmean import kmean -restore_path() - -#Optional: -set_local_path() -# import modules that are located in the same directory as this file. -restore_path() - -# Global data -X = N.array([[3.0, 3], [4, 3], [4, 2], - [9, 2], [5, 1], [6, 2], [9, 4], - [5, 2], [5, 4], [7, 4], [6, 5]]) - -codet1 = N.array([[3.0000, 3.0000], - [6.2000, 4.0000], - [5.8000, 1.8000]]) - -codet2 = N.array([[11.0/3, 8.0/3], - [6.7500, 4.2500], - [6.2500, 1.7500]]) - -class test_kmean(NumpyTestCase): - def check_iter1(self, level=1): - initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) - code = initc.copy() - code1 = kmean(X, code, 1)[0] - - assert_array_almost_equal(code1, codet1) - def check_iter2(self, level=1): - initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) - code = initc.copy() - code2 = kmean(X, code, 2)[0] - - assert_array_almost_equal(code2, codet2) - -if __name__ == "__main__": - NumpyTest().run() From scipy-svn at scipy.org Fri Jun 1 04:48:02 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 1 Jun 2007 03:48:02 -0500 (CDT) Subject: [Scipy-svn] r3067 - trunk/Lib/sandbox/lobpcg Message-ID: <20070601084802.8485139C16C@new.scipy.org> Author: rc Date: 2007-06-01 03:47:46 -0500 (Fri, 01 Jun 2007) New Revision: 3067 Modified: trunk/Lib/sandbox/lobpcg/lobpcg.py Log: fixed returning wrong eigenvectors Modified: trunk/Lib/sandbox/lobpcg/lobpcg.py =================================================================== --- trunk/Lib/sandbox/lobpcg/lobpcg.py 2007-06-01 08:21:52 UTC (rev 3066) +++ trunk/Lib/sandbox/lobpcg/lobpcg.py 2007-06-01 08:47:46 UTC (rev 3067) @@ -536,7 +536,7 @@ blockVectorBX = sc.dot( blockVectorBX, eigBlockVectorX ) + bpp blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp - + aux = blockVectorBX * _lambda[nm.newaxis,:] blockVectorR = blockVectorAX - aux @@ -550,14 +550,14 @@ if retLambdaHistory: if retResidualNormsHistory: - return _lambda, eigBlockVectorX, lambdaHistory, residualNormsHistory + return _lambda, blockVectorX, lambdaHistory, residualNormsHistory else: - return _lambda, eigBlockVectorX, lambdaHistory + return _lambda, blockVectorX, lambdaHistory else: if retResidualNormsHistory: - return _lambda, eigBlockVectorX, residualNormsHistory + return _lambda, blockVectorX, residualNormsHistory else: - return _lambda, eigBlockVectorX + return _lambda, blockVectorX ########################################################################### if __name__ == '__main__': @@ -600,3 +600,4 @@ print 'solution time:', time.clock() - tt print eigs + print vecs From scipy-svn at scipy.org Fri Jun 1 08:09:13 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 1 Jun 2007 07:09:13 -0500 (CDT) Subject: [Scipy-svn] r3068 - trunk/Lib/interpolate Message-ID: <20070601120913.6226539C00F@new.scipy.org> Author: oliphant Date: 2007-06-01 07:09:10 -0500 (Fri, 01 Jun 2007) New Revision: 3068 Modified: trunk/Lib/interpolate/__fitpack.h trunk/Lib/interpolate/interpolate.py Log: Add Lagrange interpolating polynomial Modified: trunk/Lib/interpolate/__fitpack.h =================================================================== --- trunk/Lib/interpolate/__fitpack.h 2007-06-01 08:47:46 UTC (rev 3067) +++ trunk/Lib/interpolate/__fitpack.h 2007-06-01 12:09:10 UTC (rev 3068) @@ -829,7 +829,7 @@ "integer-spaced, or cardinal spline matrix a bit faster."; static PyObject *_bsplmat(PyObject *dummy, PyObject *args) { int k,N,i,numbytes,j, equal; - int dims[2]; + npy_intp dims[2]; PyObject *x_i_py=NULL; PyArrayObject *x_i=NULL, *BB=NULL; double *t=NULL, *h=NULL, *ptr; @@ -970,7 +970,7 @@ "then it produces the result as if the sample distance were dx"; static PyObject *_bspldismat(PyObject *dummy, PyObject *args) { int k,N,i,j, equal, m; - int dims[2]; + npy_intp dims[2]; PyObject *x_i_py=NULL; PyArrayObject *x_i=NULL, *BB=NULL; double *t=NULL, *h=NULL, *ptr, *dptr; Modified: trunk/Lib/interpolate/interpolate.py =================================================================== --- trunk/Lib/interpolate/interpolate.py 2007-06-01 08:47:46 UTC (rev 3067) +++ trunk/Lib/interpolate/interpolate.py 2007-06-01 12:09:10 UTC (rev 3068) @@ -4,7 +4,7 @@ """ __all__ = ['interp1d', 'interp2d', 'spline', 'spleval', 'splmake', 'spltopp', - 'ppform'] + 'ppform', 'lagrange'] from numpy import shape, sometrue, rank, array, transpose, \ swapaxes, searchsorted, clip, take, ones, putmask, less, greater, \ @@ -23,6 +23,21 @@ all = sometrue(all,axis=0) return all +def lagrange(x, w): + """Return the Lagrange interpolating polynomial of the data-points (x,w) + """ + M = len(x) + p = poly1d(0.0) + for j in xrange(M): + pt = poly1d(w[j]) + for k in xrange(M): + if k == j: continue + fac = x[j]-x[k] + pt *= poly1d([1.0,-x[k]])/fac + p += pt + return p + + # !! Need to find argument for keeping initialize. If it isn't # !! found, get rid of it! From scipy-svn at scipy.org Sat Jun 2 18:12:47 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 2 Jun 2007 17:12:47 -0500 (CDT) Subject: [Scipy-svn] r3069 - trunk/Lib/ndimage/src Message-ID: <20070602221247.2FDAC39C08D@new.scipy.org> Author: stefan Date: 2007-06-02 17:12:32 -0500 (Sat, 02 Jun 2007) New Revision: 3069 Modified: trunk/Lib/ndimage/src/nd_image.c Log: Ensure clean memory de-allocation in Py_Histogram. Modified: trunk/Lib/ndimage/src/nd_image.c =================================================================== --- trunk/Lib/ndimage/src/nd_image.c 2007-06-01 12:09:10 UTC (rev 3068) +++ trunk/Lib/ndimage/src/nd_image.c 2007-06-02 22:12:32 UTC (rev 3069) @@ -1088,7 +1088,9 @@ &max_label, &n_results)) goto exit; - histograms = (PyArrayObject**)malloc(input->nd * n_results * + /* Set all pointers to NULL, so that freeing the memory */ + /* doesn't cause problems. */ + histograms = (PyArrayObject**)calloc(input->nd * n_results, sizeof(PyArrayObject*)); if (!histograms) { PyErr_NoMemory(); From scipy-svn at scipy.org Mon Jun 4 00:33:04 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 3 Jun 2007 23:33:04 -0500 (CDT) Subject: [Scipy-svn] r3070 - in trunk/Lib/sandbox/pyem: . profile_data Message-ID: <20070604043304.57CA539C120@new.scipy.org> Author: cdavid Date: 2007-06-03 23:32:56 -0500 (Sun, 03 Jun 2007) New Revision: 3070 Added: trunk/Lib/sandbox/pyem/data/ Modified: trunk/Lib/sandbox/pyem/densities2.py trunk/Lib/sandbox/pyem/profile_data/profile_densities.py Log: More benchmarking for basic operations in row vs col Modified: trunk/Lib/sandbox/pyem/densities2.py =================================================================== --- trunk/Lib/sandbox/pyem/densities2.py 2007-06-02 22:12:32 UTC (rev 3069) +++ trunk/Lib/sandbox/pyem/densities2.py 2007-06-04 04:32:56 UTC (rev 3070) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Wed Dec 06 09:00 PM 2006 J +# Last Change: Sat Jun 02 07:00 PM 2007 J # New version, with default numpy ordering. Modified: trunk/Lib/sandbox/pyem/profile_data/profile_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/profile_densities.py 2007-06-02 22:12:32 UTC (rev 3069) +++ trunk/Lib/sandbox/pyem/profile_data/profile_densities.py 2007-06-04 04:32:56 UTC (rev 3070) @@ -1,42 +1,78 @@ import numpy as N from numpy.random import randn -from scipy.sandbox.pyem import densities as D -from scipy.sandbox.pyem import _c_densities as DC -#import tables +from numpy.ctypeslib import load_library, ndpointer +from ctypes import cdll, c_uint, c_int, c_double, POINTER + +lib = load_library("blop.so", "file") + +arg1 = ndpointer(dtype=N.float64) +arg2 = c_uint +arg3 = c_uint +arg4 = ndpointer(dtype=N.float64) +arg5 = ndpointer(dtype=N.float64) + +lib.compute.argtypes = [arg1, arg2, arg3, arg4, arg5] +lib.compute.restype = c_int +# Compare computing per component likelihood for frame per row vs frame per column +def component_likelihood(x, mu, va, log = False): + """expect one frame to be one row (rank 2). mu and var are rank 1 array.""" + d = mu.size + + return N.exp(N.sum((x - mu) ** 2, 1)) + +def component_likelihood2(x, mu, va, log = False): + """expect one frame to be one column (rank 2). mu and var are rank 1 array.""" + d = mu.size + + y = (x[0] - mu[0]) ** 2 + for i in range(1, d): + y += (x[i] - mu[i]) ** 2 + + return N.exp(y) + +def component_likelihood3(x, mu, va, log = False): + """expect one frame to be one row (rank 2). mu and var are rank 1 array.""" + d = mu.size + + y = N.empty(x.shape[0], x.dtype) + return lib.compute(x, x.shape[0], d, mu, y) + def bench(func, mode = 'diag'): - #=========================================== - # Diag Gaussian of dimension 20 - #=========================================== d = 30 n = 1e5 niter = 10 print "Compute %d times densities, %d dimension, %d frames" % (niter, d, n) - # Generate a model with k components, d dimensions - mu = randn(1, d) - if mode == 'diag': - va = abs(randn(1, d)) - elif mode == 'full': - va = randn(d, d) - va = N.dot(va, va.transpose()) - + mu = randn(d) + va = abs(randn(d)) + X = randn(n, d) for i in range(niter): Y = func(X, mu, va) +def bench2(func, mode = 'diag'): + d = 30 + n = 1e5 + niter = 10 + + print "Compute %d times densities, %d dimension, %d frames" % (niter, d, n) + mu = randn(d) + va = abs(randn(d)) + + X = randn(d, n) + for i in range(niter): + Y = func(X, mu, va) + def benchpy(): - bench(D.gauss_den) + bench(component_likelihood) -def benchc(): - bench(DC.gauss_den) +def benchpy3(): + bench(component_likelihood3) -def benchpyfull(): - bench(D.gauss_den, 'full') +def benchpy2(): + bench2(component_likelihood2) -def benchcfull(): - bench(DC.gauss_den, 'full') - if __name__ == "__main__": import hotshot, hotshot.stats profile_file = 'gdenpy.prof' @@ -48,7 +84,14 @@ profile_file = 'gdenc.prof' prof = hotshot.Profile(profile_file, lineevents=1) - prof.runcall(benchc) + prof.runcall(benchpy2) p = hotshot.stats.load(profile_file) print p.sort_stats('cumulative').print_stats(20) prof.close() + + profile_file = 'gdenc.prof' + prof = hotshot.Profile(profile_file, lineevents=1) + prof.runcall(benchpy3) + p = hotshot.stats.load(profile_file) + print p.sort_stats('cumulative').print_stats(20) + prof.close() From scipy-svn at scipy.org Mon Jun 4 07:32:07 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 4 Jun 2007 06:32:07 -0500 (CDT) Subject: [Scipy-svn] r3071 - trunk/Lib/cluster Message-ID: <20070604113207.771B239C1B8@new.scipy.org> Author: cdavid Date: 2007-06-04 06:32:00 -0500 (Mon, 04 Jun 2007) New Revision: 3071 Modified: trunk/Lib/cluster/vq.py Log: Add a TODO for kmeans. Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-04 04:32:56 UTC (rev 3070) +++ trunk/Lib/cluster/vq.py 2007-06-04 11:32:00 UTC (rev 3071) @@ -13,12 +13,20 @@ Calculate code book membership of obs kmeans(obs,k_or_guess,iter=20,thresh=1e-5) -- Train a codebook for mimimum distortion using the kmeans algorithm + kmeans2 + Similar to kmeans, but with several initialization methods. """ __docformat__ = 'restructuredtext' __all__ = ['whiten', 'vq', 'kmeans', 'kmeans2'] +# TODO: +# - implements high level method for running several times kmeans with +# different initialialization +# - warning: what happens if different number of clusters ? For now, emit a +# warning, but it is not great, because I am not sure it really make sense to +# succeed in this case (maybe an exception is better ?) import warnings from numpy.random import randint From scipy-svn at scipy.org Thu Jun 7 22:23:33 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 21:23:33 -0500 (CDT) Subject: [Scipy-svn] r3072 - trunk/Lib/sandbox/pyem Message-ID: <20070608022333.E11F639C0C6@new.scipy.org> Author: cdavid Date: 2007-06-07 21:23:29 -0500 (Thu, 07 Jun 2007) New Revision: 3072 Modified: trunk/Lib/sandbox/pyem/gauss_mix.py Log: Refactor 1d computation for plotting Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-04 11:32:00 UTC (rev 3071) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-08 02:23:29 UTC (rev 3072) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Thu Nov 16 08:00 PM 2006 J +# Last Change: Mon Jun 04 07:00 PM 2007 J # Module to implement GaussianMixture class. @@ -264,6 +264,7 @@ raise GmParamError("""Parameters of the model has not been set yet, please set them using self.set_param()""") + assert self.d > 1 k = self.k Xe, Ye = self.conf_ellipses(*args, **kargs) try: @@ -287,13 +288,15 @@ """ # This is not optimized at all, may be slow. Should not be # difficult to make much faster, but it is late, and I am lazy + # XXX separete the computation from the plotting if not self.d == 1: raise GmParamError("the model is not one dimensional model") from scipy.stats import norm nrm = norm(0, 1) pval = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2) - # Compute reasonable min/max for the normal pdf + # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std] + # gives the range we are taking in account for each gaussian mc = 3 std = N.sqrt(self.va[:,0]) m = N.amin(self.mu[:, 0] - mc * std) @@ -338,6 +341,17 @@ except ImportError: raise GmParamError("matplotlib not found, cannot plot...") + def _get_component_pdf(self, x): + """Returns a list of pdf, one for each component. Summing them gives + the pdf of the mixture.""" + std = N.sqrt(self.va[:,0]) + retval = N.empty((x.size, self.k)) + for c in range(self.k): + retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \ + N.exp(-(x-self.mu[c][0])**2/(2*std[c]**2)) + + return retval + # Syntactic sugar def __repr__(self): repr = "" From scipy-svn at scipy.org Thu Jun 7 22:25:11 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 21:25:11 -0500 (CDT) Subject: [Scipy-svn] r3073 - in trunk/Lib/sandbox/pyem: . profile_data Message-ID: <20070608022511.75EA239C0C6@new.scipy.org> Author: cdavid Date: 2007-06-07 21:25:02 -0500 (Thu, 07 Jun 2007) New Revision: 3073 Added: trunk/Lib/sandbox/pyem/profile_data/blop.c trunk/Lib/sandbox/pyem/profile_data/gden.m trunk/Lib/sandbox/pyem/profile_data/mat_prof.m Removed: trunk/Lib/sandbox/pyem/.bzrignore trunk/Lib/sandbox/pyem/test_reg.py Modified: trunk/Lib/sandbox/pyem/TODO Log: Add some profiling scripts to compare likelihood computation with matlab. Deleted: trunk/Lib/sandbox/pyem/.bzrignore =================================================================== --- trunk/Lib/sandbox/pyem/.bzrignore 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/.bzrignore 2007-06-08 02:25:02 UTC (rev 3073) @@ -1,29 +0,0 @@ -dist -pyem/src/c_gmm.c -MANIFEST -build -pyem/bench1prof -pyem/diag.dat -pyem/gdenprof -tmp.py -test.py -profile_gmm_em.py -data.h5 -gmmprof -valgrind-python.supp -valgrind-python.supp -pyem/ -pyem/matcode/ -pyem/tmp/ -pyem/tmp/kmean.py -pyem/tmp/blop.py -pyem/tmp/ -pyem/tmp -matcode -../MSG -MSG -exinfo.py -blop -*.prog -*.prof -test_storage.py Modified: trunk/Lib/sandbox/pyem/TODO =================================================================== --- trunk/Lib/sandbox/pyem/TODO 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/TODO 2007-06-08 02:25:02 UTC (rev 3073) @@ -1,11 +1,12 @@ -# Last Change: Mon May 28 11:00 AM 2007 J +# Last Change: Mon Jun 04 07:00 PM 2007 J Things which must be implemented for a 1.0 version (in importante order) - A classifier + - handle rank 1 for 1d data - basic regularization - - Use scipy.cluster kmeans instead of our own, as it now provides all - necessary functionalities. + - docstrings + - demo for pdf estimtation, discriminant analysis and clustering Things which would be nice (after 1.0 version): - Bayes prior (hard, suppose MCMC) Added: trunk/Lib/sandbox/pyem/profile_data/blop.c =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/blop.c 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/profile_data/blop.c 2007-06-08 02:25:02 UTC (rev 3073) @@ -0,0 +1,37 @@ +#include +#include + +int compute(const double *in, size_t n, size_t d, const double* mu, double* out) +{ + size_t i, j; + double acc; + + for (i = 0; i < n; ++i) { + acc = 0; + for (j = 0; j < d; ++j) { + acc += (in[i*d+j] - mu[j]) * (in[i*d+j] - mu[j]); + } + out[i] = exp(acc); + } + + return 0; +} + +#if 0 +int main(void) +{ + const size_t n = 1e5; + const size_t d = 30; + size_t iter = 10, i; + + double *in, *out; + + in = malloc(sizeof(*in) * n * d); + out = malloc(sizeof(*out) * n * d); + + for (i = 0; i < iter; ++i) { + } + free(in); + out(in); +} +#endif Added: trunk/Lib/sandbox/pyem/profile_data/gden.m =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/gden.m 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/profile_data/gden.m 2007-06-08 02:25:02 UTC (rev 3073) @@ -0,0 +1,10 @@ +function out = gden(x, mu) + +% Last Change: Mon Jun 04 10:00 AM 2007 J +[n, d] = size(x); +[nm, dm] = size(mu); +if nm ~= n + out = sum(x-repmat(mu, n, 1), 1); +else + out = sum(x-mu, 1); +end; Added: trunk/Lib/sandbox/pyem/profile_data/mat_prof.m =================================================================== --- trunk/Lib/sandbox/pyem/profile_data/mat_prof.m 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/profile_data/mat_prof.m 2007-06-08 02:25:02 UTC (rev 3073) @@ -0,0 +1,11 @@ +% Last Change: Mon Jun 04 10:00 AM 2007 J + +n = 1e5; +d = 30; + +x = randn(n, d); +mu = randn(n, d); + +for i=1:10 + y = gden(x, mu); +end; Deleted: trunk/Lib/sandbox/pyem/test_reg.py =================================================================== --- trunk/Lib/sandbox/pyem/test_reg.py 2007-06-08 02:23:29 UTC (rev 3072) +++ trunk/Lib/sandbox/pyem/test_reg.py 2007-06-08 02:25:02 UTC (rev 3073) @@ -1,44 +0,0 @@ -import numpy as N - -from gauss_mix import GM -from gmm_em import GMM, EM - -from numpy.random import seed - -def test_reg(): - seed(0) - # Generate data with a few components - d = 2 - k = 1 - n = 500 - - w, mu, va = GM.gen_param(d, k) - gm = GM.fromvalues(w, mu, va) - - data = gm.sample(n) - - # Try to learn with an insane number of components - gmm = GMM(GM(d, 30), 'random') - - em = EM() - like= em.train(data, gmm, 20, 1e-20) - - # import pylab as P - # P.subplot(2, 1, 1) - # P.plot(data[:, 0], data[:, 1], '.') - # gmm.gm.plot() - # P.subplot(2, 1, 2) - # P.plot(like) - # print like - # P.show() - -if __name__ == "__main__": - # import hotshot, hotshot.stats - # profile_file = 'manyk.prof' - # prof = hotshot.Profile(profile_file, lineevents=1) - # prof.runcall(test_reg) - # p = hotshot.stats.load(profile_file) - # print p.sort_stats('cumulative').print_stats(20) - # prof.close() - test_reg() - From scipy-svn at scipy.org Thu Jun 7 22:36:20 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 21:36:20 -0500 (CDT) Subject: [Scipy-svn] r3074 - in trunk/Lib/sandbox/pyem/data: . oldfaithful oldfaithful/src Message-ID: <20070608023620.7606339C0C6@new.scipy.org> Author: cdavid Date: 2007-06-07 21:36:11 -0500 (Thu, 07 Jun 2007) New Revision: 3074 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/ trunk/Lib/sandbox/pyem/data/oldfaithful/README trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py trunk/Lib/sandbox/pyem/data/oldfaithful/data.py trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py trunk/Lib/sandbox/pyem/data/oldfaithful/src/ trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py Log: Add faithful data in data. Added: trunk/Lib/sandbox/pyem/data/oldfaithful/README =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/README 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/README 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,6 @@ +Each OldfaithfulC*.txt is one column of the datasets as presented in Azzalini +and Bowman. The Oldfaithful.txt is simply a cat of all thos files: this is just +to make checking easier. The data in the txt are *exactly* the same than the +ones in Azzalini and Bowman: again, post processing them in python is easy +(converting the time in seconds, etc...), and having exactly the data of the +reference makes it easier to check. Added: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,8 @@ +#! /usr/bin/env python +# Last Change: Wed Apr 25 06:00 PM 2007 J +import faith as _faith +__doc__ = _faith.DESCRSHORT +copyright = _faith.COPYRIGHT +source = _faith.SOURCE + +load = _faith.load Added: trunk/Lib/sandbox/pyem/data/oldfaithful/data.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/data.py 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/data.py 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,94 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2007 David Cournapeau + +# The code and descriptive text is copyrighted and offered under the terms of +# the BSD License from the authors; see below. However, the actual dataset may +# have a different origin and intellectual property status. See the SOURCE and +# COPYRIGHT variables for this information. + +# Copyright (c) 2007 David Cournapeau +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the author nor the names of any contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Last Change: Fri Jun 08 11:00 AM 2007 J + +"""Old faithful dataset.""" + +__docformat__ = 'restructuredtext' + +COPYRIGHT = """See SOURCE. Pr Azzalini has explicitely given his consent for +the use of those data in scipy.""" +TITLE = "Old Faithful Geyser Data" +SOURCE = """AZZALINI A., BOWMAN A. W. (1990). A look at some data on the +Old Faithful Geyser. Applied Statistics (Journal of the Royal Statistical +Society series C), vol. 39, pp. 357-365. Data collected by the Yellowstone Park +geologist, R. A. Hutchinson. + +References: + - H?rdle, W. (1991) Smoothing Techniques with Implementation in S. New + York: Springer. + - Azzalini, A. and Bowman, A. W. (1990). A look at some data on the Old +Faithful geyser. Applied Statistics, 39, 357--365. + +Those data are exactly the ones from Azzalini and Bowman's article.""" + +DESCRSHORT = """Waiting time between eruptions and the duration of the +eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming, +USA. Waiting times and duration time are in seconds""" + +DESCRLONG = """According to Azzalini and Bowman's article, those data +were recorded continuously from 1th August to 15th August 1985. + +Some of the durations times are labelled as L, M or S (Large, Small, Medium). +According to Azzalini and Bowman's paper: "because the unbroken sequence +required measurements to be taken at night, some duration times are recorded as +L (long), S (short) and M (medium). Other data sets do not contain a con- +tinuous stream of data, making it difficult to deal with time series features." +""" + +NOTE = """Eruptions time in seconds, waiting time to next eruption (in +seconds)""" + +def load(): + """load the actual data and returns them. + + :returns: + data: recordarray + a record array of the data. + """ + import numpy + from oldfaithful import waiting, duration + assert len(waiting) == len(duration) == 299 + data = numpy.empty(len(waiting), \ + [('duration', '|S5'), ('waiting', 'int')]) + data['waiting'] = waiting + data['duration'] = duration + return data Added: trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,54 @@ +duration = [ '241', '129', 'L', 'L', 'L', 'S', '263', '257', '122', '290', + '110', '327', '97', '292', '263', '106', '280', 'S', '284', '253', + '114', '298', 'S', 'L', 'S', 'L', '170', '270', '244', '223', '211', + '268', '133', '293', '156', '249', '132', '286', '110', '276', '136', + '248', 'S', 'L', 'S', 'L', '113', '256', '125', '268', '133', '240', + '106', '260', '131', '269', '233', '200', '224', '240', '117', '316', + 'S', 'L', 'S', 'L', 'S', 'L', '212', '130', '270', '121', '249', '252', + '260', '116', '279', '229', '242', '250', '280', '109', 'L', 'M', 'L', + 'S', '267', '123', '255', '115', '280', '104', '263', '106', '276', + '112', '267', '98', '302', '109', '306', '98', '257', 'S', 'L', 'S', + '272', '120', 'L', '176', '284', '234', '117', '247', '108', '280', + '110', '282', '127', '287', '109', '246', '279', 'L', 'S', 'L', 'L', + '253', '248', '236', '225', '265', '148', '250', '228', '259', '232', + '281', '102', '298', '256', '275', 'L', 'L', 'L', 'L', '119', '276', + '50', '295', '104', '275', '102', '285', '110', '270', '112', '267', + '267', '240', '288', 'L', 'L', 'S', 'L', '116', '275', '120', '222', + '172', '290', '207', '263', '108', '264', '149', '271', '126', '261', + '262', '107', '295', '109', 'L', 'L', 'L', '232', '111', '282', '121', + '268', '112', '250', '114', '255', '195', '253', '113', '299', '111', + '240', '118', '286', 'L', 'S', 'L', 'L', '143', '265', '253', '262', + '120', '267', '105', '270', '97', '282', '154', '222', '254', '116', + '261', 'L', 'L', 'L', '253', '240', '248', '113', '268', '117', '253', + '103', '267', '255', '238', '263', '118', '267', '256', '115', '265', + 'M', 'L', 'S', 'L', '197', '110', '277', '110', '277', '276', '255', + '116', '299', '118', '258', '252', '272', '264', '277', 'S', 'L', 'L', + '235', 'S', '270', '108', '240', '165', '284', '238', '117', '298', + '111', '288', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'S', 'L', '116', + '260', '100', '286', '117', '281', '116', '265', '128', '245', '124', + 'L', 'L', 'S'] +waiting = [ 4800, 4260, 3420, 4800, 4500, 4620, 3600, 5160, 4620, 3360, 4860, + 3000, 5340, 3240, 5400, 4380, 3600, 4980, 3900, 4920, 5040, 3240, 5100, + 3480, 4740, 3420, 5280, 4080, 4560, 4680, 4440, 5100, 4500, 3900, 4560, + 3480, 5460, 3000, 5220, 2880, 5580, 3240, 5160, 3180, 4680, 3120, 4980, + 3600, 5220, 2940, 4800, 3600, 5520, 2580, 5340, 3600, 5040, 4140, 4440, + 4260, 6480, 3000, 4620, 3420, 4800, 3660, 4920, 2880, 4860, 4380, 3720, + 4740, 3240, 4800, 4380, 4860, 3720, 4860, 4260, 4740, 4860, 4440, 3540, + 4860, 3960, 5220, 3180, 4800, 3000, 5220, 3060, 4920, 3480, 4860, 2940, + 5520, 3000, 5280, 3720, 5580, 3360, 5340, 3060, 4740, 3480, 4920, 3120, + 5280, 3120, 4680, 4140, 4500, 4620, 3180, 4800, 3300, 5220, 3180, 5100, + 3660, 5580, 3240, 4560, 4800, 4860, 3540, 5160, 4680, 4260, 4620, 4560, + 5640, 4500, 3000, 4980, 4920, 4320, 4620, 4500, 3900, 4740, 4320, 4680, + 4620, 4740, 4500, 4680, 3840, 4800, 2940, 5280, 3240, 5100, 3060, 5760, + 3000, 4800, 4680, 4860, 4320, 4500, 4680, 5220, 4140, 3300, 4980, 2940, + 4920, 3420, 5040, 3420, 5040, 4380, 4680, 3420, 4740, 3420, 5400, 3720, + 5220, 4680, 3120, 5880, 2880, 4680, 4740, 3900, 5040, 3000, 4980, 3600, + 4800, 3000, 5280, 3000, 5040, 4440, 4560, 3900, 5340, 2940, 5280, 3060, + 4680, 5100, 3900, 4500, 4620, 4140, 5520, 4080, 5220, 3660, 4860, 3300, + 5580, 3180, 5040, 4200, 4380, 5580, 3000, 5220, 4620, 4440, 4320, 4920, + 4440, 4800, 2940, 5460, 3180, 5160, 2940, 4740, 5340, 5220, 4560, 3540, + 4800, 5340, 2700, 5580, 4320, 4260, 3240, 4740, 4440, 3900, 4680, 3420, + 5220, 4320, 5040, 2820, 5040, 3420, 5220, 4080, 5160, 4500, 4380, 3180, + 4920, 5580, 4620, 3240, 5760, 2880, 5340, 3780, 5040, 4560, 3720, 4980, + 3000, 5100, 4680, 4680, 4860, 4680, 4560, 4440, 4860, 3960, 5040, 2880, + 5580, 2820, 5220, 3060, 4680, 3240, 5220, 3120, 5100, 3480, 5280, 4740] Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,299 @@ +4:01,80 +2:09 ,71 +L,57 +L,80 +L,75 +S,77 +4:23,60 +4:17,86 +2:02,77 +4:50,56 +1:50,81 +5:27,50 +1:37,89 +4:52,54 +4:23,90 +1:46,73 +4:40,60 +S,83 +4:44,65 +4:13,82 +1:54,84 +4:58,54 +S,85 +L,58 +S,79 +L,57 +2:50,88 +4:30,68 +4:04,76 +3:43,78 +3:31,74 +4:28,85 +2:13,75 +4:53,65 +2:36,76 +4:09,58 +2:12 ,91 +4:46,50 +1:50,87 +4:36,48 +2:16,93 +4:08,54 +S,86 +L,53 +S,78 +L,52 +1:53,83 +4:16,60 +2:05,87 +4:28,49 +2:13,80 +4:00,60 +1:46,92 +4:20,43 +2:11,89 +4:29,60 +3:53,84 +3:20,69 +3:44,74 +4:00,71 +1:57,108 +5:16,50 +S,77 +L,57 +S,80 +L,61 +S,82 +L,48 +3:32,81 +2:10,73 +4:30,62 +2:01,79 +4:09,54 +4:12,80 +4:20,73 +1:56,81 +4:39,62 +3:49,81 +4:02,71 +4:10,79 +4:40,81 +1:49,74 +L,59 +M,81 +L,66 +S,87 +4:27,53 +2:03,80 +4:15,50 +1:55,87 +4:40,51 +1:44,82 +4:23,58 +1:46,81 +4:36,49 +1:52,92 +4:27,50 +1:38,88 +5:02,62 +1:49,93 +5:06,56 +1:38,89 +4:17,51 +S,79 +L,58 +S,82 +4:32,52 +2:00,88 +L,52 +2:56,78 +4:44,69 +3:54,75 +1:57,77 +4:07,53 +1:48,80 +4:40,55 +1:50,87 +4:42,53 +2:07,85 +4:47,61 +1:49,93 +4:06,54 +4:39,76 +L,80 +S,81 +L,59 +L,86 +4:13,78 +4:08,71 +3:56,77 +3:45,76 +4:25,94 +2:28,75 +4:10,50 +3:48,83 +4:19,82 +3:52,72 +4:41,77 +1:42,75 +4:58,65 +4:16,79 +4:35,72 +L,78 +L,77 +L,79 +L,75 +1:59,78 +4:36,64 +0:50,80 +4:55,49 +1:44,88 +4:35,54 +1:42,85 +4:45,51 +1:50,96 +4:30,50 +1:52,80 +4:27,78 +4:27,81 +4:00,72 +4:48,75 +L,78 +L,87 +S,69 +L,55 +1:56,83 +4:35,49 +2:00,82 +3:42,57 +2:52,84 +4:50,57 +3:27,84 +4:23,73 +1:48,78 +4:24,57 +2:29,79 +4:31,57 +2:06,90 +4:21,62 +4:22,87 +1:47,78 +4:55,52 +1:49,98 +L,48 +L,78 +L,79 +3:52,65 +1:51,84 +4:42,50 +2:01,83 +4:28,60 +1:52,80 +4:10,50 +1:54,88 +4:15,50 +3:15,84 +4:13,74 +1:53,76 +4:59,65 +1:51,89 +4:00,49 +1:58,88 +4:46,51 +L,78 +S,85 +L,65 +L,75 +2:23,77 +4:25,69 +4:13,92 +4:22,68 +2:00,87 +4:27,61 +1:45,81 +4:30,55 +1:37,93 +4:42,53 +2:34,84 +3:42,70 +4:14,73 +1:56,93 +4:21,50 +L,87 +L,77 +L,74 +4:13,72 +4:00,82 +4:08,74 +1:53,80 +4:28,49 +1:57,91 +4:13,53 +1:43,86 +4:27,49 +4:15,79 +3:58,89 +4:23,87 +1:58,76 +4:27,59 +4:16,80 +1:55,89 +4:25,45 +M,93 +L,72 +S,71 +L,54 +3:17,79 +1:50,74 +4:37,65 +1:50,78 +4:37,57 +4:36,87 +4:15,72 +1:56,84 +4:59,47 +1:58,84 +4:18,57 +4:12,87 +4:32,68 +4:24,86 +4:37,75 +S,73 +L,53 +L,82 +3:55,93 +S,77 +4:30,54 +1:48,96 +4:00,48 +2:45,89 +4:44,63 +3:58,84 +1:57,76 +4:58,62 +1:51,83 +4:48,50 +L,85 +L,78 +L,78 +L,81 +L,78 +L,76 +L,74 +S,81 +L,66 +1:56,84 +4:20,48 +1:40,93 +4:46,47 +1:57,87 +4:41,51 +1:56,78 +4:25,54 +2:08,87 +4:05,52 +2:04,85 +L,58 +L,88 +S,79 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,50 @@ +4:01,80 +2:09 ,71 +L,57 +L,80 +L,75 +S,77 +4:23,60 +4:17,86 +2:02,77 +4:50,56 +1:50,81 +5:27,50 +1:37,89 +4:52,54 +4:23,90 +1:46,73 +4:40,60 +S,83 +4:44,65 +4:13,82 +1:54,84 +4:58,54 +S,85 +L,58 +S,79 +L,57 +2:50,88 +4:30,68 +4:04,76 +3:43,78 +3:31,74 +4:28,85 +2:13,75 +4:53,65 +2:36,76 +4:09,58 +2:12 ,91 +4:46,50 +1:50,87 +4:36,48 +2:16,93 +4:08,54 +S,86 +L,53 +S,78 +L,52 +1:53,83 +4:16,60 +2:05,87 +4:28,49 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,50 @@ +2:13,80 +4:00,60 +1:46,92 +4:20,43 +2:11,89 +4:29,60 +3:53,84 +3:20,69 +3:44,74 +4:00,71 +1:57,108 +5:16,50 +S,77 +L,57 +S,80 +L,61 +S,82 +L,48 +3:32,81 +2:10,73 +4:30,62 +2:01,79 +4:09,54 +4:12,80 +4:20,73 +1:56,81 +4:39,62 +3:49,81 +4:02,71 +4:10,79 +4:40,81 +1:49,74 +L,59 +M,81 +L,66 +S,87 +4:27,53 +2:03,80 +4:15,50 +1:55,87 +4:40,51 +1:44,82 +4:23,58 +1:46,81 +4:36,49 +1:52,92 +4:27,50 +1:38,88 +5:02,62 +1:49,93 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,50 @@ +5:06,56 +1:38,89 +4:17,51 +S,79 +L,58 +S,82 +4:32,52 +2:00,88 +L,52 +2:56,78 +4:44,69 +3:54,75 +1:57,77 +4:07,53 +1:48,80 +4:40,55 +1:50,87 +4:42,53 +2:07,85 +4:47,61 +1:49,93 +4:06,54 +4:39,76 +L,80 +S,81 +L,59 +L,86 +4:13,78 +4:08,71 +3:56,77 +3:45,76 +4:25,94 +2:28,75 +4:10,50 +3:48,83 +4:19,82 +3:52,72 +4:41,77 +1:42,75 +4:58,65 +4:16,79 +4:35,72 +L,78 +L,77 +L,79 +L,75 +1:59,78 +4:36,64 +0:50,80 +4:55,49 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,50 @@ +1:44,88 +4:35,54 +1:42,85 +4:45,51 +1:50,96 +4:30,50 +1:52,80 +4:27,78 +4:27,81 +4:00,72 +4:48,75 +L,78 +L,87 +S,69 +L,55 +1:56,83 +4:35,49 +2:00,82 +3:42,57 +2:52,84 +4:50,57 +3:27,84 +4:23,73 +1:48,78 +4:24,57 +2:29,79 +4:31,57 +2:06,90 +4:21,62 +4:22,87 +1:47,78 +4:55,52 +1:49,98 +L,48 +L,78 +L,79 +3:52,65 +1:51,84 +4:42,50 +2:01,83 +4:28,60 +1:52,80 +4:10,50 +1:54,88 +4:15,50 +3:15,84 +4:13,74 +1:53,76 +4:59,65 +1:51,89 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,50 @@ +4:00,49 +1:58,88 +4:46,51 +L,78 +S,85 +L,65 +L,75 +2:23,77 +4:25,69 +4:13,92 +4:22,68 +2:00,87 +4:27,61 +1:45,81 +4:30,55 +1:37,93 +4:42,53 +2:34,84 +3:42,70 +4:14,73 +1:56,93 +4:21,50 +L,87 +L,77 +L,74 +4:13,72 +4:00,82 +4:08,74 +1:53,80 +4:28,49 +1:57,91 +4:13,53 +1:43,86 +4:27,49 +4:15,79 +3:58,89 +4:23,87 +1:58,76 +4:27,59 +4:16,80 +1:55,89 +4:25,45 +M,93 +L,72 +S,71 +L,54 +3:17,79 +1:50,74 +4:37,65 +1:50,78 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,49 @@ +4:37,57 +4:36,87 +4:15,72 +1:56,84 +4:59,47 +1:58,84 +4:18,57 +4:12,87 +4:32,68 +4:24,86 +4:37,75 +S,73 +L,53 +L,82 +3:55,93 +S,77 +4:30,54 +1:48,96 +4:00,48 +2:45,89 +4:44,63 +3:58,84 +1:57,76 +4:58,62 +1:51,83 +4:48,50 +L,85 +L,78 +L,78 +L,81 +L,78 +L,76 +L,74 +S,81 +L,66 +1:56,84 +4:20,48 +1:40,93 +4:46,47 +1:57,87 +4:41,51 +1:56,78 +4:25,54 +2:08,87 +4:05,52 +2:04,85 +L,58 +L,88 +S,79 Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py 2007-06-08 02:25:02 UTC (rev 3073) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py 2007-06-08 02:36:11 UTC (rev 3074) @@ -0,0 +1,43 @@ +#! /usr/bin/env python +# Last Change: Fri Jun 08 11:00 AM 2007 J + +# This script generates a python file from the txt data +import csv + +dataname = 'Oldfaithful.txt' +f = open(dataname, 'r') +a = csv.reader(f) +el = [i for i in a] +duration = [i[0] for i in el] +waiting = [i[1] for i in el] + +# Convert duration and waiting times in second +duration2 = [] +for i in range(len(duration)): + if duration[i] == 'L': + duration2.append('L') + elif duration[i] == 'M': + duration2.append('M') + elif duration[i] == 'S': + duration2.append('S') + else: + m, s = duration[i].split(':') + m = int(m) + s = int(s) + assert s >= 0 and s < 60 + duration2.append(m * 60 + s) +waiting2 = [int(i) * 60 for i in waiting] + +# Write the data in oldfaitful.py +a = open("oldfaithful.py", "w") + +a.write("duration = [\n") +for i in range(len(duration2) - 1): + a.write("'%s', " % duration2[i]) +a.write("'%s']\n" % duration2[-1]) + +a.write("waiting = [\n") +for i in range(len(waiting2) - 1): + a.write("%s, " % waiting2[i]) +a.write("%s]\n" % waiting2[-1]) +a.close() Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py ___________________________________________________________________ Name: svn:executable + * From scipy-svn at scipy.org Fri Jun 8 00:09:31 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 23:09:31 -0500 (CDT) Subject: [Scipy-svn] r3075 - in trunk/Lib/cluster: . tests Message-ID: <20070608040931.881BB39C167@new.scipy.org> Author: cdavid Date: 2007-06-07 23:09:24 -0500 (Thu, 07 Jun 2007) New Revision: 3075 Modified: trunk/Lib/cluster/tests/test_vq.py trunk/Lib/cluster/vq.py Log: Modify kmeans2 arguments order so that they conform to the ones from kmeans Modified: trunk/Lib/cluster/tests/test_vq.py =================================================================== --- trunk/Lib/cluster/tests/test_vq.py 2007-06-08 02:36:11 UTC (rev 3074) +++ trunk/Lib/cluster/tests/test_vq.py 2007-06-08 04:09:24 UTC (rev 3075) @@ -1,7 +1,7 @@ #! /usr/bin/env python # David Cournapeau -# Last Change: Sat May 05 06:00 PM 2007 J +# Last Change: Fri Jun 08 12:00 PM 2007 J # For now, just copy the tests from sandbox.pyem, so we can check that # kmeans works OK for trivial examples. @@ -72,6 +72,7 @@ # print _py_vq_1d(data, initc) class test_kmean(NumpyTestCase): + #def check_kmeans def check_kmeans_simple(self, level=1): initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() @@ -93,8 +94,8 @@ """Testing simple call to kmeans2 and its results.""" initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() - code1 = kmeans2(X, code, niter = 1)[0] - code2 = kmeans2(X, code, niter = 2)[0] + code1 = kmeans2(X, code, iter = 1)[0] + code2 = kmeans2(X, code, iter = 2)[0] assert_array_almost_equal(code1, CODET1) assert_array_almost_equal(code2, CODET2) Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-08 02:36:11 UTC (rev 3074) +++ trunk/Lib/cluster/vq.py 2007-06-08 04:09:24 UTC (rev 3075) @@ -466,7 +466,7 @@ _valid_init_meth = {'random': _krandinit, 'points': _kpoints} -def kmeans2(data, k, minit='random', niter=10): +def kmeans2(data, k, iter = 10, thresh = 1e-5, minit='random'): """Classify a set of points into k clusters using kmean algorithm. The algorithm works by minimizing the euclidian distance between data points @@ -481,6 +481,10 @@ k : int or ndarray Number of clusters. If a ndarray is given instead, it is interpreted as initial cluster to use instead. + niter : int + Number of iterations to run. + niter : float + (not used yet). minit : string Method for initialization. Available methods are random, points and uniform: @@ -493,9 +497,6 @@ uniform choses k points from the data such are they form a uniform grid od the dataset. - niter : int - Number of iterations to run. - :Returns: clusters : ndarray the found clusters (one cluster per row). @@ -535,8 +536,8 @@ raise ValueError("unknown init method %s" % str(minit)) clusters = init(data, k) - assert not niter == 0 - return _kmeans2(data, clusters, niter, nc) + assert not iter == 0 + return _kmeans2(data, clusters, iter, nc) def _kmeans2(data, code, niter, nc): """ "raw" version of kmeans2. Do not use directly. From scipy-svn at scipy.org Fri Jun 8 00:14:20 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 23:14:20 -0500 (CDT) Subject: [Scipy-svn] r3076 - trunk/Lib/cluster Message-ID: <20070608041420.782BB39C167@new.scipy.org> Author: cdavid Date: 2007-06-07 23:14:14 -0500 (Thu, 07 Jun 2007) New Revision: 3076 Modified: trunk/Lib/cluster/vq.py Log: Fix typo in kmeans2 docstring. Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-08 04:09:24 UTC (rev 3075) +++ trunk/Lib/cluster/vq.py 2007-06-08 04:14:14 UTC (rev 3076) @@ -483,7 +483,7 @@ interpreted as initial cluster to use instead. niter : int Number of iterations to run. - niter : float + thresh : float (not used yet). minit : string Method for initialization. Available methods are random, points and From scipy-svn at scipy.org Fri Jun 8 00:42:30 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 7 Jun 2007 23:42:30 -0500 (CDT) Subject: [Scipy-svn] r3077 - trunk/Lib/cluster Message-ID: <20070608044230.9020439C0C6@new.scipy.org> Author: cdavid Date: 2007-06-07 23:42:23 -0500 (Thu, 07 Jun 2007) New Revision: 3077 Modified: trunk/Lib/cluster/vq.py Log: Additional init method for kmeans2, 'matrix', to avoid possible confusion between k number of components and k unique initial cluster. Close #443 Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-08 04:14:14 UTC (rev 3076) +++ trunk/Lib/cluster/vq.py 2007-06-08 04:42:23 UTC (rev 3077) @@ -479,8 +479,8 @@ dimensional data, rank 2 multidimensional data, in which case one row is one observation. k : int or ndarray - Number of clusters. If a ndarray is given instead, it is - interpreted as initial cluster to use instead. + Number of clusters. If minit arg is 'matrix', or if a ndarray is + given instead, it is interpreted as initial cluster to use instead. niter : int Number of iterations to run. thresh : float @@ -495,8 +495,11 @@ points choses k points at random from the points in data. uniform choses k points from the data such are they form a uniform - grid od the dataset. + grid od the dataset (not supported yet). + matrix means that k has to be interpreted as initial clusters + (format is the same than data). + :Returns: clusters : ndarray the found clusters (one cluster per row). @@ -517,7 +520,7 @@ # If k is not a single value, then it should be compatible with data's # shape - if N.size(k) > 1: + if N.size(k) > 1 or minit == 'matrix': if not nd == N.ndim(k): raise ValueError("k is not an int and has not same rank than data") if d == 1: @@ -529,7 +532,9 @@ data") clusters = k.copy() else: - nc = k + nc = int(k) + if not nc == k: + warnings.warn("k was not an integer, was converted.") try: init = _valid_init_meth[minit] except KeyError: From scipy-svn at scipy.org Fri Jun 8 07:15:47 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Jun 2007 06:15:47 -0500 (CDT) Subject: [Scipy-svn] r3078 - in trunk/Lib/sandbox/pyem: . data/oldfaithful Message-ID: <20070608111547.A99DE39C0F3@new.scipy.org> Author: cdavid Date: 2007-06-08 06:15:39 -0500 (Fri, 08 Jun 2007) New Revision: 3078 Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/online_em.py Log: Add function to plot density contours in GM. Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 04:42:23 UTC (rev 3077) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 11:15:39 UTC (rev 3078) @@ -1,6 +1,6 @@ #! /usr/bin/env python -# Last Change: Wed Apr 25 06:00 PM 2007 J -import faith as _faith +# Last Change: Fri Jun 08 12:00 PM 2007 J +import data as _faith __doc__ = _faith.DESCRSHORT copyright = _faith.COPYRIGHT source = _faith.SOURCE Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-08 04:42:23 UTC (rev 3077) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-08 11:15:39 UTC (rev 3078) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Fri Nov 10 10:00 AM 2006 J +# Last Change: Fri Jun 08 07:00 PM 2007 J import numpy as N import numpy.linalg as lin @@ -188,6 +188,10 @@ else: raise DenError("mean and variance are not dim conformant") + # When X is a sample from multivariante N(mu, sigma), (X-mu)Sigma^-1(X-mu) + # follows a Chi2(d) law. Here, we only take 2 dimension, so Chi2 with 2 + # degree of freedom (See Wasserman. This is easy to see with characteristic + # functions) chi22d = chi2(2) mahal = N.sqrt(chi22d.ppf(level)) @@ -218,6 +222,26 @@ return elps[0, :], elps[1, :] +def multiple_gauss_den(data, mu, va): + """Helper function to generate several Gaussian + pdf (different parameters) from the same data""" + mu = N.atleast_2d(mu) + va = N.atleast_2d(va) + + K = mu.shape[0] + n = data.shape[0] + d = mu.shape[1] + + y = N.zeros((K, n)) + if mu.size == va.size: + for i in range(K): + y[i] = gauss_den(data, mu[i, :], va[i, :]) + return y.T + else: + for i in range(K): + y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :]) + return y.T + if __name__ == "__main__": import pylab Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-08 04:42:23 UTC (rev 3077) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-08 11:15:39 UTC (rev 3078) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Jun 04 07:00 PM 2007 J +# Last Change: Fri Jun 08 07:00 PM 2007 J # Module to implement GaussianMixture class. @@ -344,6 +344,8 @@ def _get_component_pdf(self, x): """Returns a list of pdf, one for each component. Summing them gives the pdf of the mixture.""" + # XXX: have a public function to compute the pdf at given points + # instead... std = N.sqrt(self.va[:,0]) retval = N.empty((x.size, self.k)) for c in range(self.k): @@ -352,6 +354,47 @@ return retval + def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95): + """Do all the necessary computation for contour plot of mixture's density. + + Returns X, Y, Z and V as expected by mpl contour function.""" + + # Ok, it is a bit gory. Basically, we want to compute the size of the + # grid. We use conf_ellipse, which will return a couple of points for + # each component, and we can find a grid size which then is just big + # enough to contain all ellipses. This won't work well if two + # ellipsoids are crossing each other a lot (because this assumes that + # at a given point, one component is largely dominant for its + # contribution to the pdf). + + # XXX: we need log pdf, not the pdf... this can save some computing + Xe, Ye = self.conf_ellipses(level = maxlevel) + ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)] + + w = ax[1] - ax[0] + h = ax[3] - ax[2] + X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \ + N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny)) + lden = N.log(den) + V = [-5, -3, -1, -0.5, ] + V.extend(N.linspace(0, N.max(lden), 4).tolist()) + return X, Y, lden, N.array(V) + + def _densityctr(self, xrange, yrange): + """Helper function to compute density contours on a grid.""" + gr = N.meshgrid(xrange, yrange) + X = gr[0].flatten() + Y = gr[1].flatten() + xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1) + # XXX refactor computing pdf + d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w + d = N.sum(d, 1) + d = d.reshape(len(yrange), len(xrange)) + + X = gr[0] + Y = gr[1] + return X, Y, d + # Syntactic sugar def __repr__(self): repr = "" Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-08 04:42:23 UTC (rev 3077) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-08 11:15:39 UTC (rev 3078) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Fri Jun 01 05:00 PM 2007 J +# Last Change: Fri Jun 08 08:00 PM 2007 J # TODO: # - which methods to avoid va shrinking to 0 ? There are several options, @@ -65,7 +65,8 @@ d = self.gm.d init = data[0:k, :] - (code, label) = kmean(data, init, niter) + # XXX: This is bogus: should do better (in kmean or here, do not know yet) + (code, label) = kmean(data, init, niter, minit = 'matrix') w = N.ones(k) / k mu = code.copy() @@ -135,7 +136,7 @@ n = data.shape[0] # compute the gaussian pdf - tgd = multiple_gauss_den(data, self.gm.mu, self.gm.va) + tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight tgd *= self.gm.w # Normalize to get a pdf @@ -202,7 +203,7 @@ the data """ assert(self.isinit) # compute the gaussian pdf - tgd = multiple_gauss_den(data, self.gm.mu, self.gm.va) + tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight tgd *= self.gm.w @@ -367,27 +368,6 @@ else: return False -def multiple_gauss_den(data, mu, va): - """Helper function to generate several Gaussian - pdf (different parameters) from the same data""" - mu = N.atleast_2d(mu) - va = N.atleast_2d(va) - - K = mu.shape[0] - n = data.shape[0] - d = mu.shape[1] - - y = N.zeros((K, n)) - if mu.size == va.size: - for i in range(K): - y[i] = densities.gauss_den(data, mu[i, :], va[i, :]) - return y.T - else: - for i in range(K): - y[i] = densities.gauss_den(data, mu[i, :], - va[d*i:d*i+d, :]) - return y.T - if __name__ == "__main__": import copy #============================= Modified: trunk/Lib/sandbox/pyem/online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/online_em.py 2007-06-08 04:42:23 UTC (rev 3077) +++ trunk/Lib/sandbox/pyem/online_em.py 2007-06-08 11:15:39 UTC (rev 3078) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Fri Jun 01 05:00 PM 2007 J +# Last Change: Fri Jun 08 08:00 PM 2007 J #--------------------------------------------- # This is not meant to be used yet !!!! I am @@ -67,7 +67,7 @@ self.cxx = N.outer(w, mean(init_data ** 2, 0)) # w, mu and va init is the same that in the standard case - (code, label) = kmean(init_data, init_data[0:k, :], niter) + (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix') mu = code.copy() va = N.zeros((k, d)) for i in range(k): @@ -102,7 +102,7 @@ self.cxx = N.outer(w, mean(init_data ** 2, 0)) # w, mu and va init is the same that in the standard case - (code, label) = kmean(init_data, init_data[0:k, :], niter) + (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix') mu = code.copy() va = N.zeros((k, d)) for i in range(k): @@ -176,7 +176,7 @@ # w, mu and va init is the same that in the standard case (code, label) = kmean(init_data[:, N.newaxis], \ - init_data[0:k, N.newaxis], niter) + init_data[0:k, N.newaxis], iter = niter) mu = code.copy() va = N.zeros((k, 1)) for i in range(k): From scipy-svn at scipy.org Fri Jun 8 23:51:51 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 8 Jun 2007 22:51:51 -0500 (CDT) Subject: [Scipy-svn] r3079 - in trunk/Lib/sandbox/pyem/data: . oldfaithful Message-ID: <20070609035151.C21E639C12F@new.scipy.org> Author: cdavid Date: 2007-06-08 22:51:44 -0500 (Fri, 08 Jun 2007) New Revision: 3079 Added: trunk/Lib/sandbox/pyem/data/__init__.py trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/data.py Log: Add proper license in data, correct typo (double copyright) Added: trunk/Lib/sandbox/pyem/data/__init__.py =================================================================== Added: trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING 2007-06-08 11:15:39 UTC (rev 3078) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING 2007-06-09 03:51:44 UTC (rev 3079) @@ -0,0 +1,34 @@ +# The code and descriptive text is copyrighted and offered under the terms of +# the BSD License from the authors; see below. However, the actual dataset may +# have a different origin and intellectual property status. See the SOURCE and +# COPYRIGHT variables for this information. + +# Copyright (c) 2007 David Cournapeau +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the author nor the names of any contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/data.py =================================================================== --- trunk/Lib/sandbox/pyem/data/oldfaithful/data.py 2007-06-08 11:15:39 UTC (rev 3078) +++ trunk/Lib/sandbox/pyem/data/oldfaithful/data.py 2007-06-09 03:51:44 UTC (rev 3079) @@ -1,8 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (C) 2007 David Cournapeau - # The code and descriptive text is copyrighted and offered under the terms of # the BSD License from the authors; see below. However, the actual dataset may # have a different origin and intellectual property status. See the SOURCE and From scipy-svn at scipy.org Sat Jun 9 02:16:29 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 01:16:29 -0500 (CDT) Subject: [Scipy-svn] r3080 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070609061629.9C48539C191@new.scipy.org> Author: cdavid Date: 2007-06-09 01:16:08 -0500 (Sat, 09 Jun 2007) New Revision: 3080 Modified: trunk/Lib/sandbox/pyem/README trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/misc.py trunk/Lib/sandbox/pyem/tests/test_densities.py Log: Polish contour functions, so that choosing the dimension of projection works. Modified: trunk/Lib/sandbox/pyem/README =================================================================== --- trunk/Lib/sandbox/pyem/README 2007-06-09 03:51:44 UTC (rev 3079) +++ trunk/Lib/sandbox/pyem/README 2007-06-09 06:16:08 UTC (rev 3080) @@ -1,7 +1,5 @@ -Last Change: Fri Aug 04 07:00 PM 2006 J +Last Change: Sat Jun 09 12:00 PM 2007 J -Version 0.4.2 - pyem is a python module build upon numpy and scipy (see http://www.scipy.org/) for learning mixtures models using Expectation Maximization. For now, only Gaussian @@ -10,16 +8,6 @@ * computation of Gaussian pdf for multi-variate Gaussian random vectors (spherical, diagonal and full covariance matrices) * Sampling of Gaussian Mixtures Models - * Confidence ellipsoides with probability (fixed level of - 0.39 for now) + * Confidence ellipsoides with probability at arbitrary level * Classic EM for Gaussian Mixture Models * K-mean based and random initialization for EM available - -Has been tested on the following platforms: - - * Ubuntu dapper, bi Xeon 3.2 Ghz, 2 Go RAM - python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2 - * Ubuntu dapper, pentium M 1.2 ghz,. 512 Mo Ram - python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2 - * Ubuntu dapper, minimac (ppc G4 1.42 Ghz, 1Gb RAM) - python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2 Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 03:51:44 UTC (rev 3079) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 06:16:08 UTC (rev 3080) @@ -1,12 +1,13 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Fri Jun 08 07:00 PM 2007 J +# Last Change: Sat Jun 09 02:00 PM 2007 J import numpy as N import numpy.linalg as lin from numpy.random import randn from scipy.stats import chi2 +import misc # Error classes class DenError(Exception): @@ -164,19 +165,28 @@ return y -# To plot a confidence ellipse from multi-variate gaussian pdf -def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39): +# To get coordinatea of a confidence ellipse from multi-variate gaussian pdf +def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \ + npoints = misc._DEF_ELL_NP, \ + level = misc._DEF_LEVEL): """ Given a mean and covariance for multi-variate gaussian, returns npoints points for the ellipse of confidence given by level (all points will be inside the ellipsoides with a probability equal to level) Returns the coordinate x and y of the ellipse""" + if level >= 1 or level <= 0: + raise ValueError("level should be a scale strictly between 0 and 1.""") mu = N.atleast_1d(mu) va = N.atleast_1d(va) + d = mu.shape[0] c = N.array(dim) + print c, d + if N.any(c < 0) or N.any(c >= d): + raise ValueError("dim elements should be >= 0 and < %d (dimension"\ + " of the variance)" % d) if mu.size == va.size: mode = 'diag' else: Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 03:51:44 UTC (rev 3079) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 06:16:08 UTC (rev 3080) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Fri Jun 08 07:00 PM 2007 J +# Last Change: Sat Jun 09 03:00 PM 2007 J # Module to implement GaussianMixture class. @@ -7,7 +7,7 @@ from numpy.random import randn, rand import numpy.linalg as lin import densities -from misc import _MAX_DBL_DEV +import misc # Right now, two main usages of a Gaussian Model are possible # - init a Gaussian Model with meta-parameters, and trains it @@ -147,7 +147,8 @@ return X - def conf_ellipses(self, *args, **kargs): + def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \ + level = misc._DEF_LEVEL): """Returns a list of confidence ellipsoids describing the Gmm defined by mu and va. Check densities.gauss_ell for details @@ -179,14 +180,14 @@ if self.mode == 'diag': for i in range(self.k): xe, ye = densities.gauss_ell(self.mu[i,:], self.va[i,:], - *args, **kargs) + dim, npoints, level) Xe.append(xe) Ye.append(ye) elif self.mode == 'full': for i in range(self.k): xe, ye = densities.gauss_ell(self.mu[i,:], self.va[i*self.d:i*self.d+self.d,:], - *args, **kargs) + dim, npoints, level) Xe.append(xe) Ye.append(ye) @@ -253,7 +254,8 @@ #================= # Plotting methods #================= - def plot(self, *args, **kargs): + def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, + level = misc._DEF_LEVEL): """Plot the ellipsoides directly for the model Returns a list of lines, so that their style can be modified. By default, @@ -266,7 +268,7 @@ assert self.d > 1 k = self.k - Xe, Ye = self.conf_ellipses(*args, **kargs) + Xe, Ye = self.conf_ellipses(dim, npoints, level) try: import pylab as P return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)] @@ -354,7 +356,8 @@ return retval - def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95): + def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50, + maxlevel = 0.95): """Do all the necessary computation for contour plot of mixture's density. Returns X, Y, Z and V as expected by mpl contour function.""" @@ -368,33 +371,49 @@ # contribution to the pdf). # XXX: we need log pdf, not the pdf... this can save some computing - Xe, Ye = self.conf_ellipses(level = maxlevel) + Xe, Ye = self.conf_ellipses(level = maxlevel, dim = dim) ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)] w = ax[1] - ax[0] h = ax[3] - ax[2] X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \ - N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny)) + N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim) lden = N.log(den) V = [-5, -3, -1, -0.5, ] V.extend(N.linspace(0, N.max(lden), 4).tolist()) return X, Y, lden, N.array(V) - def _densityctr(self, xrange, yrange): + def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM): """Helper function to compute density contours on a grid.""" gr = N.meshgrid(xrange, yrange) X = gr[0].flatten() Y = gr[1].flatten() xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1) # XXX refactor computing pdf - d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w - d = N.sum(d, 1) - d = d.reshape(len(yrange), len(xrange)) + dmu = self.mu[:, dim] + dva = self._get_va(dim) + den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w + den = N.sum(den, 1) + den = den.reshape(len(yrange), len(xrange)) X = gr[0] Y = gr[1] - return X, Y, d + return X, Y, den + def _get_va(self, dim): + """Returns variance limited do dimension in dim.""" + dim = N.array(dim) + if dim.any() < 0 or dim.any() >= self.d: + raise ValueError("dim elements should be between 0 and dimension"\ + " of the mixture.") + if self.mode == 'diag': + return self.va[:, dim] + elif self.mode == 'full': + tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)]) + tidx.flatten() + return self.va[tidx, dim] + else: + raise ValueError("Unkown mode") # Syntactic sugar def __repr__(self): repr = "" @@ -450,7 +469,7 @@ """ # Check that w is valid - if N.fabs(N.sum(w, 0) - 1) > _MAX_DBL_DEV: + if N.fabs(N.sum(w, 0) - 1) > misc._MAX_DBL_DEV: raise GmParamError('weight does not sum to 1') if not len(w.shape) == 1: Modified: trunk/Lib/sandbox/pyem/misc.py =================================================================== --- trunk/Lib/sandbox/pyem/misc.py 2007-06-09 03:51:44 UTC (rev 3079) +++ trunk/Lib/sandbox/pyem/misc.py 2007-06-09 06:16:08 UTC (rev 3080) @@ -1,5 +1,12 @@ -# Last Change: Fri Nov 10 10:00 AM 2006 J +# Last Change: Sat Jun 09 12:00 PM 2007 J +#======================================================== +# Constants used throughout the module (def args, etc...) +#======================================================== +# This is the default dimension for representing confidence ellipses +_DEF_VIS_DIM = [0, 1] +_DEF_ELL_NP = 100 +_DEF_LEVEL = 0.39 #===================================================================== # "magic number", that is number used to control regularization and co # Change them at your risk ! Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-09 03:51:44 UTC (rev 3079) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-09 06:16:08 UTC (rev 3080) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon May 28 01:00 PM 2007 J +# Last Change: Sat Jun 09 02:00 PM 2007 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are @@ -13,6 +13,7 @@ set_package_path() from pyem.densities import gauss_den +import pyem.densities restore_path() #Optional: @@ -105,89 +106,15 @@ self._generate_test_data_2d_full() self._check(level) +class test_gauss_ell(NumpyTestCase): + def test_dim(self): + pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 1]) + try: + pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 2]) + raise AssertionError("this call should not succeed, bogus dim.") + except ValueError, e: + print "Call with bogus dim did not succeed, OK" + + if __name__ == "__main__": NumpyTest().run() - -# def generate_test_data(n, d, mode = 'diag', file='test.dat'): -# """Generate a set of data of dimension d, with n frames, -# that is input data, mean, var and output of gden, so that -# other implementations can be tested against""" -# mu = randn(1, d) -# if mode == 'diag': -# va = abs(randn(1, d)) -# elif mode == 'full': -# va = randn(d, d) -# va = dot(va, va.transpose()) -# -# input = randn(n, d) -# output = gauss_den(input, mu, va) -# -# import tables -# h5file = tables.openFile(file, "w") -# -# h5file.createArray(h5file.root, 'input', input) -# h5file.createArray(h5file.root, 'mu', mu) -# h5file.createArray(h5file.root, 'va', va) -# h5file.createArray(h5file.root, 'output', output) -# -# h5file.close() -# -# def test_gauss_den(): -# """""" -# # import tables -# # import numpy as N -# # -# # filename = 'dendata.h5' -# -# # # # Dimension 1 -# # # d = 1 -# # # mu = 1.0 -# # # va = 2.0 -# -# # # X = randn(1e3, 1) -# -# # # Y = gauss_den(X, mu, va) -# -# # # h5file = tables.openFile(filename, "w") -# -# # # h5file.createArray(h5file.root, 'X', X) -# # # h5file.createArray(h5file.root, 'mu', mu) -# # # h5file.createArray(h5file.root, 'va', va) -# # # h5file.createArray(h5file.root, 'Y', Y) -# -# # # h5file.close() -# -# # # # Dimension 2, diag -# # # d = 2 -# # # mu = N.array([1.0, -2.0]) -# # # va = N.array([1.0, 2.0]) -# -# # # X = randn(1e3, 2) -# -# # # Y = gauss_den(X, mu, va) -# -# # # h5file = tables.openFile(filename, "w") -# -# # # h5file.createArray(h5file.root, 'X', X) -# # # h5file.createArray(h5file.root, 'mu', mu) -# # # h5file.createArray(h5file.root, 'va', va) -# # # h5file.createArray(h5file.root, 'Y', Y) -# -# # # Dimension 2, full -# # d = 2 -# # mu = N.array([[0.2, -1.0]]) -# # va = N.array([[1.2, 0.1], [0.1, 0.5]]) -# -# # X = randn(1e3, 2) -# -# # Y = gauss_den(X, mu, va) -# -# # h5file = tables.openFile(filename, "w") -# -# # h5file.createArray(h5file.root, 'X', X) -# # h5file.createArray(h5file.root, 'mu', mu) -# # h5file.createArray(h5file.root, 'va', va) -# # h5file.createArray(h5file.root, 'Y', Y) -# -# # h5file.close() -# From scipy-svn at scipy.org Sat Jun 9 02:42:24 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 01:42:24 -0500 (CDT) Subject: [Scipy-svn] r3081 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070609064224.263BF39C0AC@new.scipy.org> Author: cdavid Date: 2007-06-09 01:42:03 -0500 (Sat, 09 Jun 2007) New Revision: 3081 Added: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Fail nicely when call wrong plot function (plot1d for multinomial, plot for 1d models). Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 06:16:08 UTC (rev 3080) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 06:42:03 UTC (rev 3081) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Sat Jun 09 02:00 PM 2007 J +# Last Change: Sat Jun 09 03:00 PM 2007 J import numpy as N import numpy.linalg as lin @@ -183,7 +183,6 @@ d = mu.shape[0] c = N.array(dim) - print c, d if N.any(c < 0) or N.any(c >= d): raise ValueError("dim elements should be >= 0 and < %d (dimension"\ " of the variance)" % d) Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 06:16:08 UTC (rev 3080) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 06:42:03 UTC (rev 3081) @@ -74,6 +74,10 @@ self.va = N.zeros((k * d, d)) self.is_valid = False + if d > 1: + self.is1d = False + else: + self.is1d = True def set_param(self, weights, mu, sigma): """Set parameters of the model. Args should @@ -171,6 +175,10 @@ Will plot samples X draw from the mixture model, and plot the ellipses of equi-probability from the mean with fixed level of confidence 0.39. """ + if self.is1d: + raise ValueError("This function does not make sense for 1d " + "mixtures.") + if not self.is_valid: raise GmParamError("""Parameters of the model has not been set yet, please set them using self.set_param()""") @@ -262,11 +270,14 @@ the style is red color, and nolegend for all of them. Does not work for 1d""" + if self.is1d: + raise ValueError("This function does not make sense for 1d " + "mixtures.") + if not self.is_valid: raise GmParamError("""Parameters of the model has not been set yet, please set them using self.set_param()""") - assert self.d > 1 k = self.k Xe, Ye = self.conf_ellipses(dim, npoints, level) try: @@ -288,6 +299,10 @@ - h['gpdf'] is the line for the global pdf - h['conf'] is a list of filling area """ + if not self.is1d: + raise ValueError("This function does not make sense for " + "mixtures which are not unidimensional") + # This is not optimized at all, may be slow. Should not be # difficult to make much faster, but it is late, and I am lazy # XXX separete the computation from the plotting @@ -361,6 +376,9 @@ """Do all the necessary computation for contour plot of mixture's density. Returns X, Y, Z and V as expected by mpl contour function.""" + if self.is1d: + raise ValueError("This function does not make sense for 1d " + "mixtures.") # Ok, it is a bit gory. Basically, we want to compute the size of the # grid. We use conf_ellipse, which will return a couple of points for @@ -414,6 +432,7 @@ return self.va[tidx, dim] else: raise ValueError("Unkown mode") + # Syntactic sugar def __repr__(self): repr = "" Added: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-09 06:16:08 UTC (rev 3080) +++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-09 06:42:03 UTC (rev 3081) @@ -0,0 +1,46 @@ +#! /usr/bin/env python +# Last Change: Sat Jun 09 03:00 PM 2007 J + +# For now, just test that all mode/dim execute correctly + +import sys +from numpy.testing import * + +import numpy as N + +set_package_path() +from pyem import GM +restore_path() + +class test_BasicFunc(NumpyTestCase): + """Check that basic functionalities work.""" + def test_conf_ellip(self): + """Only test whether the call succeed. To check wether the result is + OK, you have to plot the results.""" + d = 3 + k = 3 + w, mu, va = GM.gen_param(d, k) + gm = GM.fromvalues(w, mu, va) + gm.conf_ellipses() + + def test_1d_bogus(self): + """Check that functions which do not make sense for 1d fail nicely.""" + d = 1 + k = 2 + w, mu, va = GM.gen_param(d, k) + gm = GM.fromvalues(w, mu, va) + try: + gm.conf_ellipses() + raise AssertionError("This should not work !") + except ValueError, e: + print "Ok, conf_ellipses failed as expected (with msg: " + str(e) + ")" + + try: + gm.density_on_grid() + raise AssertionError("This should not work !") + except ValueError, e: + print "Ok, density_grid failed as expected (with msg: " + str(e) + ")" + + +if __name__ == "__main__": + NumpyTest().run() Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-09 06:16:08 UTC (rev 3080) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-09 06:42:03 UTC (rev 3081) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Oct 24 06:00 PM 2006 J +# Last Change: Sat Jun 09 03:00 PM 2007 J # For now, just test that all mode/dim execute correctly From scipy-svn at scipy.org Sat Jun 9 04:05:29 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 03:05:29 -0500 (CDT) Subject: [Scipy-svn] r3082 - in trunk/Lib/sandbox/pyem: . examples Message-ID: <20070609080529.0893F39C19B@new.scipy.org> Author: cdavid Date: 2007-06-09 03:05:03 -0500 (Sat, 09 Jun 2007) New Revision: 3082 Added: trunk/Lib/sandbox/pyem/examples/pdfestimation.py trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py trunk/Lib/sandbox/pyem/examples/utils.py Modified: trunk/Lib/sandbox/pyem/TODO Log: Add example of pdf estimation with EM Modified: trunk/Lib/sandbox/pyem/TODO =================================================================== --- trunk/Lib/sandbox/pyem/TODO 2007-06-09 06:42:03 UTC (rev 3081) +++ trunk/Lib/sandbox/pyem/TODO 2007-06-09 08:05:03 UTC (rev 3082) @@ -1,12 +1,12 @@ -# Last Change: Mon Jun 04 07:00 PM 2007 J +# Last Change: Sat Jun 09 04:00 PM 2007 J - Things which must be implemented for a 1.0 version (in importante order) - A classifier - handle rank 1 for 1d data - basic regularization - docstrings - - demo for pdf estimtation, discriminant analysis and clustering + - demo for pdf estimation, discriminant analysis and clustering + - scaling of data: maybe something to handle scaling internally ? Things which would be nice (after 1.0 version): - Bayes prior (hard, suppose MCMC) Added: trunk/Lib/sandbox/pyem/examples/pdfestimation.py =================================================================== --- trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 06:42:03 UTC (rev 3081) +++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 08:05:03 UTC (rev 3082) @@ -0,0 +1,50 @@ +#! /usr/bin/env python +# Last Change: Sat Jun 09 03:00 PM 2007 J + +# Example of doing pdf estimation with EM algorithm. Requires matplotlib. +import numpy as N +from numpy.testing import set_package_path, restore_path + +import pylab as P + +set_package_path() +import pyem +restore_path() +import utils + +oldfaithful = utils.get_faithful() + +# We want the relationship between d(t) and w(t+1), but get_faithful gives +# d(t), w(t), so we have to shift to get the "usual" faithful data +waiting = oldfaithful[1:, 1:] +duration = oldfaithful[:len(waiting), :1] +dt = N.concatenate((duration, waiting), 1) + +# Scale the data so that each component is in [0..1] +dt = utils.scale(dt) + +# This function train a mixture model with k components, returns the trained +# model and the BIC +def cluster(data, k, mode = 'full'): + d = data.shape[1] + gm = pyem.GM(d, k, mode) + gmm = pyem.GMM(gm) + em = pyem.EM() + em.train(data, gmm, maxiter = 20) + return gm, gmm.bic(data) + +# bc will contain a list of BIC values for each model trained +bc = [] +mode = 'full' +for k in range(1, 5): + # Train a model of k component, and plots isodensity curve + P.subplot(2, 2, k) + gm, b = cluster(dt, k = k, mode = mode) + bc.append(b) + + X, Y, Z, V = gm.density_on_grid() + P.contour(X, Y, Z, V) + P.plot(dt[:, 0], dt[:, 1], '.') + +print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) +P.show() Added: trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py =================================================================== --- trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py 2007-06-09 06:42:03 UTC (rev 3081) +++ trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py 2007-06-09 08:05:03 UTC (rev 3082) @@ -0,0 +1,69 @@ +#! /usr/bin/env python +# Last Change: Sat Jun 09 04:00 PM 2007 J + +# Example of doing pdf estimation with EM algorithm. Requires matplotlib. +import numpy as N +from numpy.testing import set_package_path, restore_path + +import pylab as P +import matplotlib as MPL + +set_package_path() +import pyem +restore_path() +import utils + +oldfaithful = utils.get_faithful() + +duration = oldfaithful[:, :1] +waiting = oldfaithful[:, 1:] + +#dt = utils.scale(duration) +#dt = duration / 60. +dt = waiting / 60. + +# This function train a mixture model with k components, returns the trained +# model and the BIC +def cluster(data, k): + d = data.shape[1] + gm = pyem.GM(d, k) + gmm = pyem.GMM(gm) + em = pyem.EM() + em.train(data, gmm, maxiter = 20) + return gm, gmm.bic(data) + +# bc will contain a list of BIC values for each model trained, gml the +# corresponding mixture model +bc = [] +gml = [] + +for k in range(1, 8): + gm, b = cluster(dt, k = k) + bc.append(b) + gml.append(gm) + +mbic = N.argmax(bc) + +# Below is code to display a figure with histogram and best model (in the BIC sense) +# pdf, with the BIC as a function of the number of components on the right. +P.figure(figsize = [12, 7]) +P.subplot(1, 2, 1) +h = gml[mbic].plot1d(gpdf=True) +h['gpdf'][0].set_linestyle('-') +h['gpdf'][0].set_label('pdf of the mixture') +h['pdf'][0].set_label('pdf of individual component') +[l.set_linestyle('-') for l in h['pdf']] +[l.set_color('g') for l in h['pdf']] + +prop = MPL.font_manager.FontProperties(size='smaller') +P.legend(loc = 'best', prop = prop) + +P.hist(dt, 25, normed = 1, fill = False) +P.xlabel('waiting time between consecutive eruption (in min)') + +P.subplot(1, 2, 2) +P.plot(N.arange(1, 8), bc, 'o:') +P.xlabel("number of components") +P.ylabel("BIC") +print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) +P.show() Added: trunk/Lib/sandbox/pyem/examples/utils.py =================================================================== --- trunk/Lib/sandbox/pyem/examples/utils.py 2007-06-09 06:42:03 UTC (rev 3081) +++ trunk/Lib/sandbox/pyem/examples/utils.py 2007-06-09 08:05:03 UTC (rev 3082) @@ -0,0 +1,44 @@ +#! /usr/bin/env python +# Last Change: Fri Jun 08 04:00 PM 2007 J + +# Various utilities for examples + +import numpy as N +from numpy.testing import set_package_path, restore_path + +# XXX: Bouah, hackish... Will go away once scipydata found its way +set_package_path() +from pyem.data import oldfaithful +restore_path() + +def get_faithful(): + """Return faithful data as a nx2 array, first column being duration, second + being waiting time.""" + # Load faithful data, convert waiting into integer, remove L, M and S data + data = oldfaithful.load() + tmp1 = [] + tmp2 = [] + for i in data: + if not (i[0] == 'L' or i[0] == 'M' or i[0] == 'S'): + tmp1.append(i[0]) + tmp2.append(i[1]) + + waiting = N.array([int(i) for i in tmp1], dtype = N.float) + duration = N.array([i for i in tmp2], dtype = N.float) + + waiting = waiting[:, N.newaxis] + duration = duration[:, N.newaxis] + + return N.concatenate((waiting, duration), 1) + +def scale(data): + """ Scale data such as each col is in the range [0..1]. + + Note: inplace.""" + n = N.min(data, 0) + m = N.max(data, 0) + + data -= n + data /= (m-n) + return data + From scipy-svn at scipy.org Sat Jun 9 04:12:02 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 03:12:02 -0500 (CDT) Subject: [Scipy-svn] r3083 - in trunk/Lib/sandbox/pyem: . data data/oldfaithful data/oldfaithful/src doc doc/examples examples profile_data src tests Message-ID: <20070609081202.3551F39C0E6@new.scipy.org> Author: cdavid Date: 2007-06-09 03:11:46 -0500 (Sat, 09 Jun 2007) New Revision: 3083 Modified: trunk/Lib/sandbox/pyem/ trunk/Lib/sandbox/pyem/data/ trunk/Lib/sandbox/pyem/data/oldfaithful/ trunk/Lib/sandbox/pyem/data/oldfaithful/src/ trunk/Lib/sandbox/pyem/doc/ trunk/Lib/sandbox/pyem/doc/examples/ trunk/Lib/sandbox/pyem/examples/ trunk/Lib/sandbox/pyem/profile_data/ trunk/Lib/sandbox/pyem/src/ trunk/Lib/sandbox/pyem/tests/ Log: Set svn:ignore to sane values everywhere in pyem Property changes on: trunk/Lib/sandbox/pyem ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so *.prof + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/data ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful/src ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/doc ___________________________________________________________________ Name: svn:ignore - *.aux *.log *.out *.tex + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/doc/examples ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/examples ___________________________________________________________________ Name: svn:ignore + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/profile_data ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/src ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so + *.pyc *.swp *.pyd *.so *.prof Property changes on: trunk/Lib/sandbox/pyem/tests ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so + *.pyc *.swp *.pyd *.so *.prof From scipy-svn at scipy.org Sat Jun 9 04:38:13 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 03:38:13 -0500 (CDT) Subject: [Scipy-svn] r3084 - in trunk/Lib/sandbox/pyem: . data Message-ID: <20070609083813.320FD39C19B@new.scipy.org> Author: cdavid Date: 2007-06-09 03:37:59 -0500 (Sat, 09 Jun 2007) New Revision: 3084 Added: trunk/Lib/sandbox/pyem/data/setup.py Modified: trunk/Lib/sandbox/pyem/setup.py Log: Add data as a proper submodule Added: trunk/Lib/sandbox/pyem/data/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/data/setup.py 2007-06-09 08:11:46 UTC (rev 3083) +++ trunk/Lib/sandbox/pyem/data/setup.py 2007-06-09 08:37:59 UTC (rev 3084) @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('data',parent_package,top_path) + config.add_subpackage('oldfaithful') + config.make_config_py() # installs __config__.py + return config + +if __name__ == '__main__': + print 'This is the wrong setup.py file to run' Modified: trunk/Lib/sandbox/pyem/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/setup.py 2007-06-09 08:11:46 UTC (rev 3083) +++ trunk/Lib/sandbox/pyem/setup.py 2007-06-09 08:37:59 UTC (rev 3084) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Fri Jun 01 05:00 PM 2007 J +# Last Change: Sat Jun 09 05:00 PM 2007 J # TODO: # - check how to handle cmd line build options with distutils and use # it in the building process @@ -28,6 +28,7 @@ from numpy.distutils.misc_util import Configuration config = Configuration(package_name,parent_package,top_path, version = VERSION) + config.add_subpackage('data') config.add_data_dir('tests') config.add_data_dir('profile_data') config.add_extension('c_gden', From scipy-svn at scipy.org Sat Jun 9 07:39:04 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 06:39:04 -0500 (CDT) Subject: [Scipy-svn] r3085 - trunk/Lib/sandbox/pyem Message-ID: <20070609113904.D9C6939C0E7@new.scipy.org> Author: cdavid Date: 2007-06-09 06:38:41 -0500 (Sat, 09 Jun 2007) New Revision: 3085 Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/misc.py Log: Clean up densities.py code, set docstrings to rest Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 08:37:59 UTC (rev 3084) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 11:38:41 UTC (rev 3085) @@ -1,11 +1,15 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Sat Jun 09 03:00 PM 2007 J +# Last Change: Sat Jun 09 08:00 PM 2007 J +"""This module implements various bsic functions related to multivariate +gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" +__docformat__ = 'restructuredtext' + import numpy as N import numpy.linalg as lin -from numpy.random import randn +#from numpy.random import randn from scipy.stats import chi2 import misc @@ -18,6 +22,7 @@ message -- explanation of the error""" def __init__(self, message): self.message = message + Exception.__init__(self) def __str__(self): return self.message @@ -25,33 +30,50 @@ # The following function do all the fancy stuff to check that parameters # are Ok, and call the right implementation if args are OK. def gauss_den(x, mu, va, log = False): - """ Compute multivariate Gaussian density at points x for + """Compute multivariate Gaussian density at points x for mean mu and variance va. + :Parameters: + x : ndarray + points where to estimate the pdf. each row of the array is one + point of d dimension + mu : ndarray + mean of the pdf. Should have same dimension d than points in x. + va : ndarray + variance of the pdf. If va has d elements, va is interpreted as the + diagonal elements of the actual covariance matrix. Otherwise, + should be a dxd matrix (and positive definite). + log : boolean + if True, returns the log-pdf instead of the pdf. + + :Returns: + pdf : ndarray + Returns a rank 1 array of the pdf at points x. + + Notes + ----- Vector are row vectors, except va which can be a matrix - (row vector variance for diagonal variance) + (row vector variance for diagonal variance).""" - If log is True, than the log density is returned - (useful for underflow ?)""" - mu = N.atleast_2d(mu) - va = N.atleast_2d(va) - x = N.atleast_2d(x) + lmu = N.atleast_2d(mu) + lva = N.atleast_2d(va) + lx = N.atleast_2d(x) #=======================# # Checking parameters # #=======================# - if len(N.shape(mu)) != 2: + if len(N.shape(lmu)) != 2: raise DenError("mu is not rank 2") - if len(N.shape(va)) != 2: + if len(N.shape(lva)) != 2: raise DenError("va is not rank 2") - if len(N.shape(x)) != 2: + if len(N.shape(lx)) != 2: raise DenError("x is not rank 2") - (n, d) = x.shape - (dm0, dm1) = mu.shape - (dv0, dv1) = va.shape + d = N.shape(lx)[1] + (dm0, dm1) = N.shape(lmu) + (dv0, dv1) = N.shape(lva) # Check x and mu same dimension if dm0 != 1: @@ -73,13 +95,13 @@ #===============# if d == 1: # scalar case - return _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log) + return _scalar_gauss_den(lx[:, 0], lmu[0, 0], lva[0, 0], log) elif dv0 == 1: # Diagonal matrix case - return _diag_gauss_den(x, mu, va, log) + return _diag_gauss_den(lx, lmu, lva, log) elif dv1 == dv0: # full case - return _full_gauss_den(x, mu, va, log) + return _full_gauss_den(lx, lmu, lva, log) else: raise DenError("variance mode not recognized, this is a bug") @@ -115,20 +137,20 @@ Call gauss_den instead""" # Diagonal matrix case d = mu.size - n = x.shape[0] + #n = x.shape[0] if not log: - inva = 1/va[0,0] - fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva) - y = (x[:,0] - mu[0,0]) ** 2 * inva * -0.5 + inva = 1/va[0, 0] + fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva) + y = (x[:, 0] - mu[0, 0]) ** 2 * inva * -0.5 for i in range(1, d): - inva = 1/va[0,i] - fac *= N.sqrt(inva) - y += (x[:,i] - mu[0,i]) ** 2 * inva * -0.5 - y = fac * N.exp(y) + inva = 1/va[0, i] + fac *= N.sqrt(inva) + y += (x[:, i] - mu[0, i]) ** 2 * inva * -0.5 + y = fac * N.exp(y) else: - y = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log) + y = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log) for i in range(1, d): - y += _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log) + y += _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log) return y def _full_gauss_den(x, mu, va, log): @@ -166,31 +188,46 @@ return y # To get coordinatea of a confidence ellipse from multi-variate gaussian pdf -def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \ - npoints = misc._DEF_ELL_NP, \ - level = misc._DEF_LEVEL): - """ Given a mean and covariance for multi-variate - gaussian, returns npoints points for the ellipse - of confidence given by level (all points will be inside - the ellipsoides with a probability equal to level) +def gauss_ell(mu, va, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, \ + level = misc.DEF_LEVEL): + """Given a mean and covariance for multi-variate + gaussian, returns the coordinates of the confidense ellipsoid. - Returns the coordinate x and y of the ellipse""" + Compute npoints coordinates for the ellipse of confidence of given level + (all points will be inside the ellipsoides with a probability equal to + level). + + :Parameters: + mu : ndarray + mean of the pdf + va : ndarray + variance of the pdf + dim : sequence + sequences of two integers which represent the dimensions where to + project the ellipsoid. + npoints: int + number of points to generate for the ellipse. + level : float + level of confidence (between 0 and 1). + + :Returns: + Returns the coordinate x and y of the ellipse.""" if level >= 1 or level <= 0: raise ValueError("level should be a scale strictly between 0 and 1.""") - mu = N.atleast_1d(mu) - va = N.atleast_1d(va) - d = mu.shape[0] - c = N.array(dim) + mu = N.atleast_1d(mu) + va = N.atleast_1d(va) + d = N.shape(mu)[0] + c = N.array(dim) if N.any(c < 0) or N.any(c >= d): raise ValueError("dim elements should be >= 0 and < %d (dimension"\ " of the variance)" % d) - if mu.size == va.size: + if N.size(mu) == N.size(va): mode = 'diag' else: - if va.ndim == 2: - if va.shape[0] == va.shape[1]: + if N.ndim(va) == 2: + if N.shape(va)[0] == N.shape(va)[1]: mode = 'full' else: raise DenError("variance not square") @@ -215,7 +252,7 @@ elps = N.outer(mu, N.ones(npoints)) elps += N.dot(N.diag(N.sqrt(va)), circle) elif mode == 'full': - va = va[c,:][:,c] + va = va[c, :][:, c] # Method: compute the cholesky decomp of each cov matrix, that is # compute cova such as va = cova * cova' # WARN: scipy is different than matlab here, as scipy computes a lower @@ -227,22 +264,38 @@ elps = N.outer(mu, N.ones(npoints)) elps += N.dot(cova, circle) else: - raise DenParam("var mode not recognized") + raise ValueError("var mode not recognized") return elps[0, :], elps[1, :] def multiple_gauss_den(data, mu, va): """Helper function to generate several Gaussian - pdf (different parameters) from the same data""" - mu = N.atleast_2d(mu) - va = N.atleast_2d(va) + pdf (different parameters) at the same points - K = mu.shape[0] - n = data.shape[0] - d = mu.shape[1] + :Parameters: + data : ndarray + points where to estimate the pdfs (n,d). + mu : ndarray + mean of the pdf, of shape (k,d). One row of dimension d per + different component, the number of rows k being the number of + component + va : ndarray + variance of the pdf. One row per different component for diagonal + covariance (k, d), or d rows per component for full matrix pdf + (k*d,d). + + :Returns: + Returns a (n, k) array, each column i being the pdf of the ith mean and + ith variance.""" + mu = N.atleast_2d(mu) + va = N.atleast_2d(va) + + K = N.shape(mu)[0] + n = N.shape(data)[0] + d = N.shape(mu)[1] - y = N.zeros((K, n)) - if mu.size == va.size: + y = N.zeros((K, n)) + if N.size(mu) == N.size(va): for i in range(K): y[i] = gauss_den(data, mu[i, :], va[i, :]) return y.T @@ -252,39 +305,40 @@ return y.T if __name__ == "__main__": - import pylab + pass + ## import pylab - #========================================= - # Test plotting a simple diag 2d variance: - #========================================= - va = N.array([5, 3]) - mu = N.array([2, 3]) + ## #========================================= + ## # Test plotting a simple diag 2d variance: + ## #========================================= + ## va = N.array([5, 3]) + ## mu = N.array([2, 3]) - # Generate a multivariate gaussian of mean mu and covariance va - X = randn(1e3, 2) - Yc = N.dot(N.diag(N.sqrt(va)), X.transpose()) - Yc = Yc.transpose() + mu + ## # Generate a multivariate gaussian of mean mu and covariance va + ## X = randn(1e3, 2) + ## Yc = N.dot(N.diag(N.sqrt(va)), X.transpose()) + ## Yc = Yc.transpose() + mu - # Plotting - Xe, Ye = gauss_ell(mu, va, npoints = 100) - pylab.figure() - pylab.plot(Yc[:, 0], Yc[:, 1], '.') - pylab.plot(Xe, Ye, 'r') + ## # Plotting + ## Xe, Ye = gauss_ell(mu, va, npoints = 100) + ## pylab.figure() + ## pylab.plot(Yc[:, 0], Yc[:, 1], '.') + ## pylab.plot(Xe, Ye, 'r') - #========================================= - # Test plotting a simple full 2d variance: - #========================================= - va = N.array([[0.2, 0.1],[0.1, 0.5]]) - mu = N.array([0, 3]) + ## #========================================= + ## # Test plotting a simple full 2d variance: + ## #========================================= + ## va = N.array([[0.2, 0.1],[0.1, 0.5]]) + ## mu = N.array([0, 3]) - # Generate a multivariate gaussian of mean mu and covariance va - X = randn(1e3, 2) - Yc = N.dot(lin.cholesky(va), X.transpose()) - Yc = Yc.transpose() + mu + ## # Generate a multivariate gaussian of mean mu and covariance va + ## X = randn(1e3, 2) + ## Yc = N.dot(lin.cholesky(va), X.transpose()) + ## Yc = Yc.transpose() + mu - # Plotting - Xe, Ye = gauss_ell(mu, va, npoints = 100, level=0.95) - pylab.figure() - pylab.plot(Yc[:, 0], Yc[:, 1], '.') - pylab.plot(Xe, Ye, 'r') - pylab.show() + ## # Plotting + ## Xe, Ye = gauss_ell(mu, va, npoints = 100, level=0.95) + ## pylab.figure() + ## pylab.plot(Yc[:, 0], Yc[:, 1], '.') + ## pylab.plot(Xe, Ye, 'r') + ## pylab.show() Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 08:37:59 UTC (rev 3084) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 11:38:41 UTC (rev 3085) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Sat Jun 09 03:00 PM 2007 J +# Last Change: Sat Jun 09 08:00 PM 2007 J # Module to implement GaussianMixture class. @@ -151,8 +151,8 @@ return X - def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \ - level = misc._DEF_LEVEL): + def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, + level = misc.DEF_LEVEL): """Returns a list of confidence ellipsoids describing the Gmm defined by mu and va. Check densities.gauss_ell for details @@ -262,8 +262,8 @@ #================= # Plotting methods #================= - def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, - level = misc._DEF_LEVEL): + def plot(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, + level = misc.DEF_LEVEL): """Plot the ellipsoides directly for the model Returns a list of lines, so that their style can be modified. By default, @@ -371,7 +371,7 @@ return retval - def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50, + def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50, maxlevel = 0.95): """Do all the necessary computation for contour plot of mixture's density. @@ -401,7 +401,7 @@ V.extend(N.linspace(0, N.max(lden), 4).tolist()) return X, Y, lden, N.array(V) - def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM): + def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM): """Helper function to compute density contours on a grid.""" gr = N.meshgrid(xrange, yrange) X = gr[0].flatten() Modified: trunk/Lib/sandbox/pyem/misc.py =================================================================== --- trunk/Lib/sandbox/pyem/misc.py 2007-06-09 08:37:59 UTC (rev 3084) +++ trunk/Lib/sandbox/pyem/misc.py 2007-06-09 11:38:41 UTC (rev 3085) @@ -1,12 +1,12 @@ -# Last Change: Sat Jun 09 12:00 PM 2007 J +# Last Change: Sat Jun 09 07:00 PM 2007 J #======================================================== # Constants used throughout the module (def args, etc...) #======================================================== # This is the default dimension for representing confidence ellipses -_DEF_VIS_DIM = [0, 1] -_DEF_ELL_NP = 100 -_DEF_LEVEL = 0.39 +DEF_VIS_DIM = [0, 1] +DEF_ELL_NP = 100 +DEF_LEVEL = 0.39 #===================================================================== # "magic number", that is number used to control regularization and co # Change them at your risk ! From scipy-svn at scipy.org Sat Jun 9 07:43:55 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 06:43:55 -0500 (CDT) Subject: [Scipy-svn] r3086 - trunk/Lib/sandbox/pyem Message-ID: <20070609114355.2B62E39C0E7@new.scipy.org> Author: cdavid Date: 2007-06-09 06:43:51 -0500 (Sat, 09 Jun 2007) New Revision: 3086 Modified: trunk/Lib/sandbox/pyem/misc.py Log: Set def arguments to immutable to avoid nasty side effect. Modified: trunk/Lib/sandbox/pyem/misc.py =================================================================== --- trunk/Lib/sandbox/pyem/misc.py 2007-06-09 11:38:41 UTC (rev 3085) +++ trunk/Lib/sandbox/pyem/misc.py 2007-06-09 11:43:51 UTC (rev 3086) @@ -1,10 +1,10 @@ -# Last Change: Sat Jun 09 07:00 PM 2007 J +# Last Change: Sat Jun 09 08:00 PM 2007 J #======================================================== # Constants used throughout the module (def args, etc...) #======================================================== # This is the default dimension for representing confidence ellipses -DEF_VIS_DIM = [0, 1] +DEF_VIS_DIM = (0, 1) DEF_ELL_NP = 100 DEF_LEVEL = 0.39 #===================================================================== From scipy-svn at scipy.org Sat Jun 9 10:03:18 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sat, 9 Jun 2007 09:03:18 -0500 (CDT) Subject: [Scipy-svn] r3087 - in trunk/Lib/sandbox/pyem: . doc examples Message-ID: <20070609140318.EFA1D39C08C@new.scipy.org> Author: cdavid Date: 2007-06-09 09:03:01 -0500 (Sat, 09 Jun 2007) New Revision: 3087 Added: trunk/Lib/sandbox/pyem/doc/pdfestimation.png Modified: trunk/Lib/sandbox/pyem/__init__.py trunk/Lib/sandbox/pyem/_c_densities.py trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/doc/ trunk/Lib/sandbox/pyem/doc/Makefile trunk/Lib/sandbox/pyem/doc/index.txt trunk/Lib/sandbox/pyem/doc/tutorial.pdf trunk/Lib/sandbox/pyem/examples/pdfestimation.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/info.py trunk/Lib/sandbox/pyem/online_em.py Log: Heavy liftup of the code + docstrings. Modified: trunk/Lib/sandbox/pyem/__init__.py =================================================================== --- trunk/Lib/sandbox/pyem/__init__.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/__init__.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon May 28 01:00 PM 2007 J +# Last Change: Sat Jun 09 10:00 PM 2007 J from info import __doc__ @@ -8,7 +8,7 @@ #from online_em import OnGMM as _OnGMM #import examples as _examples -__all__ = filter(lambda s:not s.startswith('_'),dir()) +__all__ = filter(lambda s:not s.startswith('_'), dir()) from numpy.testing import NumpyTest test = NumpyTest().test Modified: trunk/Lib/sandbox/pyem/_c_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/_c_densities.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/_c_densities.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,28 +1,34 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Thu Nov 09 05:00 PM 2006 J +# Last Change: Sat Jun 09 10:00 PM 2007 J +"""This module implements some function of densities module in C for efficiency +reasons. gaussian, such as pdf estimation, confidence interval/ellipsoids, +etc...""" + +__docformat__ = 'restructuredtext' + # This module uses a C implementation through ctypes, for diagonal cases # TODO: # - portable way to find/open the shared library # - full cov matrice +# - test before inclusion import numpy as N import numpy.linalg as lin -from numpy.random import randn -from scipy.stats import chi2 -import densities as D +#from numpy.random import randn +#from scipy.stats import chi2 +#import densities as D import ctypes -from ctypes import cdll, c_uint, c_int, c_double, POINTER +from ctypes import c_uint, c_int from numpy.ctypeslib import ndpointer, load_library ctypes_major = int(ctypes.__version__.split('.')[0]) if ctypes_major < 1: - msg = "version of ctypes is %s, expected at least %s" \ - % (ctypes.__version__, '1.0.0') - raise ImportError(msg) + raise ImportError(msg = "version of ctypes is %s, expected at least %s"\ + % (ctypes.__version__, '1.0.1')) # Requirements for diag gden _gden = load_library('c_gden.so', __file__) @@ -75,9 +81,9 @@ if len(N.shape(x)) != 2: raise DenError("x is not rank 2") - (n, d) = x.shape - (dm0, dm1) = mu.shape - (dv0, dv1) = va.shape + (n, d) = N.shape(x) + (dm0, dm1) = N.shape(mu) + (dv0, dv1) = N.shape(va) # Check x and mu same dimension if dm0 != 1: @@ -165,9 +171,9 @@ # inva.ctypes.data_as(POINTER(c_double)), # y.ctypes.data_as(POINTER(c_double))) else: - y = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log) + y = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log) for i in range(1, d): - y += _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log) + y += _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log) return y def _full_gauss_den(x, mu, va, log): @@ -199,19 +205,20 @@ return y if __name__ == "__main__": - #========================================= - # Test accuracy between pure and C python - #========================================= - mu = N.array([2.0, 3]) - va = N.array([5.0, 3]) + pass + ##========================================= + ## Test accuracy between pure and C python + ##========================================= + #mu = N.array([2.0, 3]) + #va = N.array([5.0, 3]) - # Generate a multivariate gaussian of mean mu and covariance va - nframes = 1e4 - X = randn(nframes, 2) - Yc = N.dot(N.diag(N.sqrt(va)), X.transpose()) - Yc = Yc.transpose() + mu + ## Generate a multivariate gaussian of mean mu and covariance va + #nframes = 1e4 + #X = randn(nframes, 2) + #Yc = N.dot(N.diag(N.sqrt(va)), X.transpose()) + #Yc = Yc.transpose() + mu - Y = D.gauss_den(Yc, mu, va) - Yt = gauss_den(Yc, mu, va) + #Y = D.gauss_den(Yc, mu, va) + #Yt = gauss_den(Yc, mu, va) - print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2) + #print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2) Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,8 +1,8 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Sat Jun 09 08:00 PM 2007 J -"""This module implements various bsic functions related to multivariate +# Last Change: Sat Jun 09 10:00 PM 2007 J +"""This module implements various basic functions related to multivariate gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" __docformat__ = 'restructuredtext' @@ -50,10 +50,10 @@ pdf : ndarray Returns a rank 1 array of the pdf at points x. - Notes - ----- - Vector are row vectors, except va which can be a matrix - (row vector variance for diagonal variance).""" + Note + ---- + Vector are row vectors, except va which can be a matrix + (row vector variance for diagonal variance).""" lmu = N.atleast_2d(mu) lva = N.atleast_2d(va) Property changes on: trunk/Lib/sandbox/pyem/doc ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so *.prof + *.pyc *.swp *.pyd *.so *.prof *.out *.tex Modified: trunk/Lib/sandbox/pyem/doc/Makefile =================================================================== --- trunk/Lib/sandbox/pyem/doc/Makefile 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/doc/Makefile 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,4 +1,4 @@ -# Last Change: Mon May 28 10:00 AM 2007 J +# Last Change: Sat Jun 09 05:00 PM 2007 J # This makefile is used to build the pdf from the rest file and inlined code # from python examples @@ -7,7 +7,7 @@ rst2tex = PYTHONPATH=/home/david/local/lib/python2.4/site-packages rst2newlatex.py \ --stylesheet-path base.tex --user-stylesheet user.tex -pytexfiles = pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex +pytexfiles = pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex pdfestimation.tex SOURCEPATH = $(PWD) @@ -24,15 +24,18 @@ pyem.tex: index.txt $(rst2tex) $< > $@ -basic_example1.tex: examples/basic_example1.py +basic_example1.tex: ../examples/basic_example1.py $(py2tex) $< > $@ -basic_example2.tex: examples/basic_example2.py +basic_example2.tex: ../examples/basic_example2.py $(py2tex) $< > $@ -basic_example3.tex: examples/basic_example3.py +basic_example3.tex: ../examples/basic_example3.py $(py2tex) $< > $@ +pdfestimation.tex: ../examples/pdfestimation.py + $(py2tex) $< > $@ + clean: for i in $(pytexfiles); do \ rm -f `echo $$i`; \ Modified: trunk/Lib/sandbox/pyem/doc/index.txt =================================================================== --- trunk/Lib/sandbox/pyem/doc/index.txt 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/doc/index.txt 2007-06-09 14:03:01 UTC (rev 3087) @@ -13,7 +13,7 @@ file: Bic_example.png /restindex -.. Last Change: Mon May 28 10:00 AM 2007 J +.. Last Change: Sat Jun 09 07:00 PM 2007 J =================================================== PyEM, a python package for Gaussian mixture models @@ -176,14 +176,36 @@ Examples ========= -TODO. +Using EM for pdf estimation +--------------------------- +The following example uses the old faithful dataset and is available in the +example directory. It models the joint distribution (d(t), w(t+1)), where d(t) +is the duration time, and w(t+1) the waiting time for the next eruption. It +selects the best model using the BIC. + +.. raw:: latex + + \input{pdfestimation.tex} + +.. figure:: pdfestimation.png + :width: 500 + :height: 400 + + isodensity curves for the old faithful data modeled by a 1, 2, 3 and 4 + componenits model (up to bottom, left to right). + + Using EM for clustering ----------------------- +TODO (this is fundamentally the same than pdf estimation, though) + Using PyEM for supervised learning ---------------------------------- +TODO + Note on performances ==================== Added: trunk/Lib/sandbox/pyem/doc/pdfestimation.png =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/pyem/doc/pdfestimation.png ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf =================================================================== (Binary files differ) Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py =================================================================== --- trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,15 +1,11 @@ #! /usr/bin/env python -# Last Change: Sat Jun 09 03:00 PM 2007 J +# Last Change: Sat Jun 09 07:00 PM 2007 J # Example of doing pdf estimation with EM algorithm. Requires matplotlib. import numpy as N -from numpy.testing import set_package_path, restore_path - import pylab as P -set_package_path() -import pyem -restore_path() +from scipy.sandbox import pyem import utils oldfaithful = utils.get_faithful() @@ -45,6 +41,8 @@ X, Y, Z, V = gm.density_on_grid() P.contour(X, Y, Z, V) P.plot(dt[:, 0], dt[:, 1], '.') + P.xlabel('duration time (scaled)') + P.ylabel('waiting time (scaled)') print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1) P.show() Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,8 +1,14 @@ # /usr/bin/python -# Last Change: Sat Jun 09 08:00 PM 2007 J +# Last Change: Sat Jun 09 10:00 PM 2007 J -# Module to implement GaussianMixture class. +"""Module implementing GM, a class which represents Gaussian mixtures. +GM instances can be used to create, sample mixtures. They also provide +different plotting facilities, such as isodensity contour for multi dimensional +models, ellipses of confidence.""" + +__docformat__ = 'restructuredtext' + import numpy as N from numpy.random import randn, rand import numpy.linalg as lin @@ -21,12 +27,12 @@ # be used as long as w, mu and va are not set # - We have to use scipy now for chisquare pdf, so there may be other # methods to be used, ie for implementing random index. -# - there is no check on internal state of the GM, that is does w, mu and va values -# make sense (eg singular values) -# - plot1d is still very rhough. There should be a sensible way to -# modify the result plot (maybe returns a dic with global pdf, component pdf and -# fill matplotlib handles). Should be coherent with plot -class GmParamError: +# - there is no check on internal state of the GM, that is does w, mu and va +# values make sense (eg singular values) - plot1d is still very rhough. There +# should be a sensible way to modify the result plot (maybe returns a dic +# with global pdf, component pdf and fill matplotlib handles). Should be +# coherent with plot +class GmParamError(Exception): """Exception raised for errors in gmm params Attributes: @@ -34,6 +40,7 @@ message -- explanation of the error """ def __init__(self, message): + Exception.__init__(self) self.message = message def __str__(self): @@ -52,11 +59,27 @@ # Methods to construct a mixture #=============================== def __init__(self, d, k, mode = 'diag'): - """Init a Gaussian Mixture of k components, each component being a - d multi-variate Gaussian, with covariance matrix of style mode. - - If you want to build a Gaussian Mixture with knowns weights, means - and variances, you can use GM.fromvalues method directly""" + """Init a Gaussian Mixture. + + :Parameters: + d : int + dimension of the mixture. + k : int + number of component in the mixture. + mode : string + mode of covariance + + :Returns: + an instance of GM. + + Note + ---- + + Only full and diag mode are supported for now. + + :SeeAlso: + If you want to build a Gaussian Mixture with knowns weights, means + and variances, you can use GM.fromvalues method directly""" if mode not in self._cov_mod: raise GmParamError("mode %s not recognized" + str(mode)) @@ -80,16 +103,42 @@ self.is1d = True def set_param(self, weights, mu, sigma): - """Set parameters of the model. Args should - be conformant with metparameters d and k given during - initialisation""" + """Set parameters of the model. + + Args should be conformant with metparameters d and k given during + initialisation. + + :Parameters: + weights : ndarray + weights of the mixture (k elements) + mu : ndarray + means of the mixture. One component's mean per row, k row for k + components. + sigma : ndarray + variances of the mixture. For diagonal models, one row contains + the diagonal elements of the covariance matrix. For full + covariance, d rows for one variance. + + Examples + -------- + Create a 3 component, 2 dimension mixture with full covariance matrices + + >>> w = numpy.array([0.2, 0.5, 0.3]) + >>> mu = numpy.array([[0., 0.], [1., 1.]]) + >>> va = numpy.array([[1., 0.], [0., 1.], [2., 0.5], [0.5, 1]]) + >>> gm = GM(2, 3, 'full') + >>> gm.set_param(w, mu, va) + + :SeeAlso: + If you know already the parameters when creating the model, you can + simply use the method class GM.fromvalues.""" k, d, mode = check_gmm_param(weights, mu, sigma) if not k == self.k: raise GmParamError("Number of given components is %d, expected %d" % (k, self.k)) if not d == self.d: - raise GmParamError("Dimension of the given model is %d, expected %d" - % (d, self.d)) + raise GmParamError("Dimension of the given model is %d, "\ + "expected %d" % (d, self.d)) if not mode == self.mode and not d == 1: raise GmParamError("Given covariance mode is %s, expected %s" % (mode, self.mode)) @@ -104,16 +153,34 @@ """This class method can be used to create a GM model directly from its parameters weights, mean and variance - w, mu, va = GM.gen_param(d, k) - gm = GM(d, k) - gm.set_param(w, mu, va) + :Parameters: + weights : ndarray + weights of the mixture (k elements) + mu : ndarray + means of the mixture. One component's mean per row, k row for k + components. + sigma : ndarray + variances of the mixture. For diagonal models, one row contains + the diagonal elements of the covariance matrix. For full + covariance, d rows for one variance. + :Returns: + gm : GM + an instance of GM. + + Examples + -------- + + >>> w, mu, va = GM.gen_param(d, k) + >>> gm = GM(d, k) + >>> gm.set_param(w, mu, va) + and - w, mu, va = GM.gen_param(d, k) - gm = GM.fromvalue(w, mu, va) + >>> w, mu, va = GM.gen_param(d, k) + >>> gm = GM.fromvalue(w, mu, va) - Are equivalent """ + are strictly equivalent.""" k, d, mode = check_gmm_param(weights, mu, sigma) res = cls(d, k, mode) res.set_param(weights, mu, sigma) @@ -123,7 +190,15 @@ # Fundamental facilities (sampling, confidence, etc..) #===================================================== def sample(self, nframes): - """ Sample nframes frames from the model """ + """ Sample nframes frames from the model. + + :Parameters: + nframes : int + number of samples to draw. + + :Returns: + samples : ndarray + samples in the format one sample per row (nframes, d).""" if not self.is_valid: raise GmParamError("""Parameters of the model has not been set yet, please set them using self.set_param()""") @@ -134,47 +209,60 @@ X = randn(nframes, self.d) if self.mode == 'diag': - X = self.mu[S, :] + X * N.sqrt(self.va[S,:]) + X = self.mu[S, :] + X * N.sqrt(self.va[S, :]) elif self.mode == 'full': # Faster: cho = N.zeros((self.k, self.va.shape[1], self.va.shape[1])) for i in range(self.k): # Using cholesky looks more stable than sqrtm; sqrtm is not # available in numpy anyway, only in scipy... - cho[i] = lin.cholesky(self.va[i*self.d:i*self.d+self.d,:]) + cho[i] = lin.cholesky(self.va[i*self.d:i*self.d+self.d, :]) for s in range(self.k): tmpind = N.where(S == s)[0] X[tmpind] = N.dot(X[tmpind], cho[s].transpose()) + self.mu[s] else: - raise GmParamError('cov matrix mode not recognized, this is a bug !') + raise GmParamError("cov matrix mode not recognized, "\ + "this is a bug !") return X - def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, + def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, level = misc.DEF_LEVEL): """Returns a list of confidence ellipsoids describing the Gmm defined by mu and va. Check densities.gauss_ell for details - Returns: - -Xe: a list of x coordinates for the ellipses (Xe[i] is - the array containing x coordinates of the ith Gaussian) - -Ye: a list of y coordinates for the ellipses + :Parameters: + dim : sequence + sequences of two integers which represent the dimensions where to + project the ellipsoid. + npoints : int + number of points to generate for the ellipse. + level : float + level of confidence (between 0 and 1). - Example: + :Returns: + Xe : sequence + a list of x coordinates for the ellipses (Xe[i] is the array + containing x coordinates of the ith Gaussian) + Ye : sequence + a list of y coordinates for the ellipses. + + Examples + -------- Suppose we have w, mu and va as parameters for a mixture, then: - gm = GM(d, k) - gm.set_param(w, mu, va) - X = gm.sample(1000) - Xe, Ye = gm.conf_ellipsoids() - pylab.plot(X[:,0], X[:, 1], '.') - for k in len(w): - pylab.plot(Xe[k], Ye[k], 'r') + >>> gm = GM(d, k) + >>> gm.set_param(w, mu, va) + >>> X = gm.sample(1000) + >>> Xe, Ye = gm.conf_ellipsoids() + >>> pylab.plot(X[:,0], X[:, 1], '.') + >>> for k in len(w): + ... pylab.plot(Xe[k], Ye[k], 'r') Will plot samples X draw from the mixture model, and plot the ellipses of equi-probability from the mean with - fixed level of confidence 0.39. """ + default level of confidence.""" if self.is1d: raise ValueError("This function does not make sense for 1d " "mixtures.") @@ -187,14 +275,14 @@ Ye = [] if self.mode == 'diag': for i in range(self.k): - xe, ye = densities.gauss_ell(self.mu[i,:], self.va[i,:], + xe, ye = densities.gauss_ell(self.mu[i, :], self.va[i, :], dim, npoints, level) Xe.append(xe) Ye.append(ye) elif self.mode == 'full': for i in range(self.k): - xe, ye = densities.gauss_ell(self.mu[i,:], - self.va[i*self.d:i*self.d+self.d,:], + xe, ye = densities.gauss_ell(self.mu[i, :], + self.va[i*self.d:i*self.d+self.d, :], dim, npoints, level) Xe.append(xe) Ye.append(ye) @@ -202,8 +290,11 @@ return Xe, Ye def check_state(self): + """Returns true if the parameters of the model are valid. + + For Gaussian mixtures, this means weights summing to 1, and variances + to be positive definite. """ - """ if not self.is_valid: raise GmParamError("""Parameters of the model has not been set yet, please set them using self.set_param()""") @@ -222,18 +313,33 @@ cond = N.zeros(self.k) ava = N.absolute(self.va) for c in range(self.k): - cond[c] = N.amax(ava[c,:]) / N.amin(ava[c,:]) + cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :]) print cond - def gen_param(self, d, nc, varmode = 'diag', spread = 1): - """Generate valid parameters for a gaussian mixture model. - d is the dimension, nc the number of components, and varmode - the mode for cov matrices. + @classmethod + def gen_param(cls, d, nc, varmode = 'diag', spread = 1): + """Generate random, valid parameters for a gaussian mixture model. + :Parameters: + d : int + the dimension + nc : int + the number of components + varmode : string + covariance matrix mode ('full' or 'diag'). + + :Returns: + w : ndarray + weights of the mixture + mu : ndarray + means of the mixture + w : ndarray + variances of the mixture + + Notes + ----- This is a class method. - - Returns: w, mu, va """ w = abs(randn(nc)) w = w / sum(w, 0) @@ -251,13 +357,13 @@ return w, mu, va - gen_param = classmethod(gen_param) + #gen_param = classmethod(gen_param) - #======================= - # Regularization methods - #======================= - def _regularize(self): - raise NotImplemented("No regularization") + # #======================= + # # Regularization methods + # #======================= + # def _regularize(self): + # raise NotImplemented("No regularization") #================= # Plotting methods @@ -266,10 +372,29 @@ level = misc.DEF_LEVEL): """Plot the ellipsoides directly for the model - Returns a list of lines, so that their style can be modified. By default, - the style is red color, and nolegend for all of them. + Returns a list of lines handle, so that their style can be modified. By + default, the style is red color, and nolegend for all of them. - Does not work for 1d""" + :Parameters: + dim : sequence + sequence of two integers, the dimensions of interest. + npoints : int + Number of points to use for the ellipsoids. + level : int + level of confidence (to use with fill argument) + + :Returns: + h : sequence + Returns a list of lines handle so that their properties + can be modified (eg color, label, etc...): + + Note + ---- + Does not work for 1d. Requires matplotlib + + :SeeAlso: + conf_ellipses is used to compute the ellipses. Use this if you want + to plot with something else than matplotlib.""" if self.is1d: raise ValueError("This function does not make sense for 1d " "mixtures.") @@ -282,22 +407,32 @@ Xe, Ye = self.conf_ellipses(dim, npoints, level) try: import pylab as P - return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)] + return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in + range(k)] #for i in range(k): # P.plot(Xe[i], Ye[i], 'r') except ImportError: raise GmParamError("matplotlib not found, cannot plot...") - def plot1d(self, level = 0.5, fill = 0, gpdf = 0): - """This function plots the pdfs of each component of the model. - If gpdf is 1, also plots the global pdf. If fill is 1, fill confidence - areas using level argument as a level value + def plot1d(self, level = misc.DEF_LEVEL, fill = False, gpdf = False): + """Plots the pdf of each component of the 1d mixture. - Returns a dictionary h of plot handles so that their properties can - be modified (eg color, label, etc...): - - h['pdf'] is a list of lines, one line per component pdf - - h['gpdf'] is the line for the global pdf - - h['conf'] is a list of filling area + :Parameters: + level : int + level of confidence (to use with fill argument) + fill : bool + if True, the area of the pdf corresponding to the given + confidence intervales is filled. + gpdf : bool + if True, the global pdf is plot. + + :Returns: + h : dict + Returns a dictionary h of plot handles so that their properties + can be modified (eg color, label, etc...): + - h['pdf'] is a list of lines, one line per component pdf + - h['gpdf'] is the line for the global pdf + - h['conf'] is a list of filling area """ if not self.is1d: raise ValueError("This function does not make sense for " @@ -310,12 +445,12 @@ raise GmParamError("the model is not one dimensional model") from scipy.stats import norm nrm = norm(0, 1) - pval = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2) + pval = N.sqrt(self.va[:, 0]) * nrm.ppf((1+level)/2) # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std] # gives the range we are taking in account for each gaussian mc = 3 - std = N.sqrt(self.va[:,0]) + std = N.sqrt(self.va[:, 0]) m = N.amin(self.mu[:, 0] - mc * std) M = N.amax(self.mu[:, 0] + mc * std) @@ -326,7 +461,7 @@ # Prepare the dic of plot handles to return ks = ['pdf', 'conf', 'gpdf'] - hp = dict((i,[]) for i in ks) + hp = dict((i, []) for i in ks) try: import pylab as P for c in range(self.k): @@ -336,7 +471,8 @@ h = P.plot(x, y, 'r', label ='_nolegend_') hp['pdf'].extend(h) if fill: - #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + self.mu[c][0], + #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + + #self.mu[c][0], # facecolor = 'b', alpha = 0.2) id1 = -pval[c] + self.mu[c] id2 = pval[c] + self.mu[c] @@ -350,7 +486,8 @@ facecolor = 'b', alpha = 0.1, label='_nolegend_') hp['conf'].extend(h) #P.fill([xc[0], xc[0], xc[-1], xc[-1]], - # [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = 0.2) + # [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = + # 0.2) if gpdf: h = P.plot(x, Yt, 'r:', label='_nolegend_') hp['gpdf'] = h @@ -363,7 +500,7 @@ the pdf of the mixture.""" # XXX: have a public function to compute the pdf at given points # instead... - std = N.sqrt(self.va[:,0]) + std = N.sqrt(self.va[:, 0]) retval = N.empty((x.size, self.k)) for c in range(self.k): retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \ @@ -373,9 +510,30 @@ def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50, maxlevel = 0.95): - """Do all the necessary computation for contour plot of mixture's density. + """Do all the necessary computation for contour plot of mixture's + density. - Returns X, Y, Z and V as expected by mpl contour function.""" + :Parameters: + dim : sequence + sequence of two integers, the dimensions of interest. + nx : int + Number of points to use for the x axis of the grid + ny : int + Number of points to use for the y axis of the grid + + :Returns: + X : ndarray + points of the x axis of the grid + Y : ndarray + points of the y axis of the grid + Z : ndarray + values of the density on X and Y + V : ndarray + Contour values to display. + + Note + ---- + X, Y, Z and V are as expected by matplotlib contour function.""" if self.is1d: raise ValueError("This function does not make sense for 1d " "mixtures.") @@ -397,13 +555,14 @@ X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \ N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim) lden = N.log(den) + # XXX: how to find "good" values for level ? V = [-5, -3, -1, -0.5, ] V.extend(N.linspace(0, N.max(lden), 4).tolist()) return X, Y, lden, N.array(V) - def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM): + def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM): """Helper function to compute density contours on a grid.""" - gr = N.meshgrid(xrange, yrange) + gr = N.meshgrid(rangex, rangey) X = gr[0].flatten() Y = gr[1].flatten() xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1) @@ -412,7 +571,7 @@ dva = self._get_va(dim) den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w den = N.sum(den, 1) - den = den.reshape(len(yrange), len(xrange)) + den = den.reshape(len(rangey), len(rangex)) X = gr[0] Y = gr[1] @@ -435,16 +594,16 @@ # Syntactic sugar def __repr__(self): - repr = "" - repr += "Gaussian Mixture:\n" - repr += " -> %d dimensions\n" % self.d - repr += " -> %d components\n" % self.k - repr += " -> %s covariance \n" % self.mode + msg = "" + msg += "Gaussian Mixture:\n" + msg += " -> %d dimensions\n" % self.d + msg += " -> %d components\n" % self.k + msg += " -> %s covariance \n" % self.mode if self.is_valid: - repr += "Has initial values""" + msg += "Has initial values""" else: - repr += "Has no initial values yet""" - return repr + msg += "Has no initial values yet""" + return msg def __str__(self): return self.__repr__() @@ -472,19 +631,26 @@ def check_gmm_param(w, mu, va): """Check that w, mu and va are valid parameters for - a mixture of gaussian: w should sum to 1, there should - be the same number of component in each param, the variances - should be positive definite, etc... + a mixture of gaussian. - Params: - w = vector or list of weigths of the mixture (K elements) - mu = matrix: K * d - va = list of variances (vector K * d or square matrices Kd * d) + w should sum to 1, there should be the same number of component in each + param, the variances should be positive definite, etc... + + :Parameters: + w : ndarray + vector or list of weigths of the mixture (K elements) + mu : ndarray + matrix: K * d + va : ndarray + list of variances (vector K * d or square matrices Kd * d) - returns: - K = number of components - d = dimension - mode = 'diag' if diagonal covariance, 'full' of full matrices + :Returns: + k : int + number of components + d : int + dimension + mode : string + 'diag' if diagonal covariance, 'full' of full matrices """ # Check that w is valid @@ -527,34 +693,35 @@ return K, d, mode if __name__ == '__main__': - # Meta parameters: - # - k = number of components - # - d = dimension - # - mode : mode of covariance matrices - d = 5 - k = 4 + pass + ## # Meta parameters: + ## # - k = number of components + ## # - d = dimension + ## # - mode : mode of covariance matrices + ## d = 5 + ## k = 4 - # Now, drawing a model - mode = 'full' - nframes = 1e3 + ## # Now, drawing a model + ## mode = 'full' + ## nframes = 1e3 - # Build a model with random parameters - w, mu, va = GM.gen_param(d, k, mode, spread = 3) - gm = GM.fromvalues(w, mu, va) + ## # Build a model with random parameters + ## w, mu, va = GM.gen_param(d, k, mode, spread = 3) + ## gm = GM.fromvalues(w, mu, va) - # Sample nframes frames from the model - X = gm.sample(nframes) + ## # Sample nframes frames from the model + ## X = gm.sample(nframes) - # Plot the data - import pylab as P - P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_') + ## # Plot the data + ## import pylab as P + ## P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_') - # Real confidence ellipses with confidence level - level = 0.50 - h = gm.plot(level=level) + ## # Real confidence ellipses with confidence level + ## level = 0.50 + ## h = gm.plot(level=level) - # set the first ellipse label, which will appear in the legend - h[0].set_label('confidence ell at level ' + str(level)) + ## # set the first ellipse label, which will appear in the legend + ## h[0].set_label('confidence ell at level ' + str(level)) - P.legend(loc = 0) - P.show() + ## P.legend(loc = 0) + ## P.show() Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,6 +1,12 @@ # /usr/bin/python -# Last Change: Fri Jun 08 08:00 PM 2007 J +# Last Change: Sat Jun 09 10:00 PM 2007 J +"""Module implementing GMM, a class to estimate Gaussian mixture models using +EM, and EM, a class which use GMM instances to estimate models parameters using +the ExpectationMaximization algorithm.""" + +__docformat__ = 'restructuredtext' + # TODO: # - which methods to avoid va shrinking to 0 ? There are several options, # not sure which ones are appropriates @@ -8,22 +14,23 @@ # - online EM import numpy as N -import numpy.linalg as lin +#import numpy.linalg as lin from numpy.random import randn #import _c_densities as densities import densities #from kmean import kmean from scipy.cluster.vq import kmeans2 as kmean -from gauss_mix import GM +#from gauss_mix import GM -from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND +#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND # Error classes class GmmError(Exception): """Base class for exceptions in this module.""" - pass + def __init__(self): + Exception.__init__(self) -class GmmParamError: +class GmmParamError(GmmError): """Exception raised for errors in gmm params Attributes: @@ -31,41 +38,33 @@ message -- explanation of the error """ def __init__(self, message): + GmmError.__init__(self) self.message = message def __str__(self): return self.message -# Not sure yet about how to design different mixture models. Most of the code -# is different # (pdf, update part of EM, etc...) and I am not sure it makes -# sense to use inheritance for # interface specification in python, since its -# dynamic type systeme. - -# Anyway, a mixture model class should encapsulates all details -# concerning getting sufficient statistics (SS), likelihood and bic. class MixtureModel(object): pass class ExpMixtureModel(MixtureModel): - """Class to model mixture of exponential pdf (eg Gaussian, exponential, Laplace, - etc..). This is a special case because some parts of EM are common for those - models...""" + """Class to model mixture of exponential pdf (eg Gaussian, exponential, + Laplace, etc..). This is a special case because some parts of EM are common + for those models...""" pass class GMM(ExpMixtureModel): - """ A class to model a Gaussian Mixture Model (GMM). An instance of - this class is created by giving weights, mean and variances in the ctor. - An instanciated object can be sampled, trained by EM. - - The class method gen_model can be used without instanciation.""" - + """ A class to model a Gaussian Mixture Model (GMM). An instance of this + class is created by giving weights, mean and variances in the ctor. An + instanciated object can be sampled, trained by EM. """ def init_kmean(self, data, niter = 5): """ Init the model with kmean.""" k = self.gm.k d = self.gm.d init = data[0:k, :] - # XXX: This is bogus: should do better (in kmean or here, do not know yet) + # XXX: This is bogus initialization should do better (in kmean or here, + # do not know yet): should (code, label) = kmean(data, init, niter, minit = 'matrix') w = N.ones(k) / k @@ -74,14 +73,15 @@ va = N.zeros((k, d)) for i in range(k): for j in range(d): - va[i,j] = N.cov(data[N.where(label==i), j], rowvar = 0) + va[i, j] = N.cov(data[N.where(label==i), j], rowvar = 0) elif self.gm.mode == 'full': va = N.zeros((k*d, d)) for i in range(k): - va[i*d:i*d+d,:] = \ + va[i*d:i*d+d, :] = \ N.cov(data[N.where(label==i)], rowvar = 0) else: - raise GmmParamError("mode " + str(mode) + " not recognized") + raise GmmParamError("mode " + str(self.gm.mode) + \ + " not recognized") self.gm.set_param(w, mu, va) @@ -96,8 +96,8 @@ mu = randn(k, d) va = N.fabs(randn(k, d)) else: - raise GmmParamError("""init_random not implemented for - mode %s yet""", mode) + raise GmmParamError("init_random not implemented for " + "mode %s yet", self.gm.mode) self.gm.set_param(w, mu, va) @@ -109,8 +109,18 @@ # - To handle the different modes, we could do something "fancy" such as # replacing methods, to avoid checking cases everywhere and unconsistency. def __init__(self, gm, init = 'kmean'): - """ Initialize a GMM with weight w, mean mu and variances va, and initialization - method for training init (kmean by default)""" + """Initialize a mixture model. + + Initialize the model from a GM instance. This class implements all the + necessary functionalities for EM. + + :Parameters: + gm : GM + the mixture model to train. + init : string + initialization method to use. + + """ self.gm = gm # Possible init methods @@ -124,17 +134,18 @@ self.initst = init def sufficient_statistics(self, data): - """ Return normalized and non-normalized sufficient statistics - from the model. + """Compute responsabilities. - Computes the latent variable distribution (a - posteriori probability) knowing the explicit data - for the Gaussian model (w, mu, var): gamma(t, i) = - P[state = i | observation = data(t); w, mu, va] + Return normalized and non-normalized sufficient statistics from the + model. + + Note + ---- + Computes the latent variable distribution (a posteriori probability) + knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, + i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for GMM.""" - n = data.shape[0] - # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight @@ -149,22 +160,22 @@ from the a posteriori pdf, computed by gmm_posterior (E step). """ - k = self.gm.k - d = self.gm.d - n = data.shape[0] - invn = 1.0/n - mGamma = N.sum(gamma, axis = 0) + k = self.gm.k + d = self.gm.d + n = data.shape[0] + invn = 1.0/n + mGamma = N.sum(gamma, axis = 0) if self.gm.mode == 'diag': - mu = N.zeros((k, d)) - va = N.zeros((k, d)) - gamma = gamma.T + mu = N.zeros((k, d)) + va = N.zeros((k, d)) + gamma = gamma.T for c in range(k): - x = N.dot(gamma[c:c+1,:], data)[0,:] - xx = N.dot(gamma[c:c+1,:], data ** 2)[0,:] + x = N.dot(gamma[c:c+1, :], data)[0, :] + xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :] - mu[c,:] = x / mGamma[c] - va[c,:] = xx / mGamma[c] - mu[c,:] ** 2 + mu[c, :] = x / mGamma[c] + va[c, :] = xx / mGamma[c] - mu[c, :] ** 2 w = invn * mGamma elif self.gm.mode == 'full': @@ -177,21 +188,22 @@ mu = N.zeros((k, d)) va = N.zeros((k*d, d)) - gamma = gamma.transpose() + gamma = gamma.transpose() for c in range(k): #x = N.sum(N.outer(gamma[:, c], # N.ones((1, d))) * data, axis = 0) - x = N.dot(gamma[c:c+1,:], data)[0,:] - xx = N.zeros((d, d)) + x = N.dot(gamma[c:c+1, :], data)[0, :] + xx = N.zeros((d, d)) # This should be much faster than recursing on n... for i in range(d): for j in range(d): - xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[c,:], axis = 0) + xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :], + axis = 0) - mu[c,:] = x / mGamma[c] - va[c*d:c*d+d,:] = xx / mGamma[c] - \ - N.outer(mu[c,:], mu[c,:]) + mu[c, :] = x / mGamma[c] + va[c*d:c*d+d, :] = xx / mGamma[c] \ + - N.outer(mu[c, :], mu[c, :]) w = invn * mGamma else: raise GmmParamError("varmode not recognized") @@ -226,19 +238,17 @@ of the definition given here. """ if self.gm.mode == 'diag': - """ for a diagonal model, we have - k - 1 (k weigths, but one constraint of normality) - + k * d (means) + k * d (variances) """ + # for a diagonal model, we have k - 1 (k weigths, but one + # constraint of normality) + k * d (means) + k * d (variances) free_deg = self.gm.k * (self.gm.d * 2 + 1) - 1 elif self.gm.mode == 'full': - """ for a full model, we have - k - 1 (k weigths, but one constraint of normality) - + k * d (means) + k * d * d / 2 (each covariance matrice - has d **2 params, but with positivity constraint) """ + # for a full model, we have k - 1 (k weigths, but one constraint of + # normality) + k * d (means) + k * d * d / 2 (each covariance + # matrice has d **2 params, but with positivity constraint) if self.gm.d == 1: - free_deg = self.gm.k * 3 - 1 + free_deg = self.gm.k * 3 - 1 else: - free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1 + free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1 lk = self.likelihood(data) n = N.shape(data)[0] @@ -261,21 +271,32 @@ pass def train(self, data, model, maxiter = 10, thresh = 1e-5): - """ - Train a model using data, and stops when the likelihood fails - behind a threshold, or when the number of iterations > niter, - whichever comes first + """Train a model using EM. - Args: - - data: contains the observed features, one row is one frame, ie one - observation of dimension d - - model: object of class Mixture - - maxiter: maximum number of iterations + Train a model using data, and stops when the likelihood increase + between two consecutive iteration fails behind a threshold, or when the + number of iterations > niter, whichever comes first - The model is trained, and its parameters updated accordingly. + :Parameters: + data : ndarray + contains the observed features, one row is one frame, ie one + observation of dimension d + model : GMM + GMM instance. + maxiter : int + maximum number of iterations + thresh : threshold + if the slope of the likelihood falls below this value, the + algorithm stops. - Returns: - likelihood (one value per iteration). + :Returns: + likelihood : ndarray + one value per iteration. + + Note + ---- + The model is trained, and its parameters updated accordingly, eg the + results are put in the GMM instance. """ if not isinstance(model, MixtureModel): raise TypeError("expect a MixtureModel as a model") @@ -296,62 +317,24 @@ model.update_em(data, g) if has_em_converged(like[i], like[i-1], thresh): return like[0:i] - # # Em computation, with computation of the likelihood - # g, tgd = model.sufficient_statistics(data) - # like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) - # model.update_em(data, g) - # for i in range(1, maxiter): - # print "=== Iteration %d ===" % i - # isreg = False - # for j in range(model.gm.k): - # va = model.gm.va[j] - # if va.any() < _MIN_INV_COND: - # isreg = True - # print "\tregularization detected" - # print "\t" + str(va) - # model.gm.va[j] = regularize_diag(va) - # print "\t" + str(va) + ", " + str(model.gm.va[j]) - # print "\t" + str(gauss_den(data, model.gm.mu[j], model.gm.va[j])) - # print "\tend regularization detected" - # var = va - # - # g, tgd = model.sufficient_statistics(data) - # try: - # assert not( (N.isnan(tgd)).any() ) - # if isreg: - # print var - # except AssertionError: - # print "tgd is nan..." - # print model.gm.va[13,:] - # print 1/model.gm.va[13,:] - # print densities.gauss_den(data, model.gm.mu[13], model.gm.va[13]) - # print N.isnan((multiple_gauss_den(data, model.gm.mu, model.gm.va))).any() - # print "Exciting" - # import sys - # sys.exit(-1) - # like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) - # model.update_em(data, g) - # assert not( model.gm.va.any() < 1e-6) - # if has_em_converged(like[i], like[i-1], thresh): - # return like[0:i] return like -def regularize_diag(variance, alpha = _DEF_ALPHA): - delta = N.sum(variance) / variance.size - if delta > _MIN_DBL_DELTA: - return variance + alpha * delta - else: - return variance + alpha * _MIN_DBL_DELTA +#def regularize_diag(variance, alpha = _DEF_ALPHA): +# delta = N.sum(variance) / variance.size +# if delta > _MIN_DBL_DELTA: +# return variance + alpha * delta +# else: +# return variance + alpha * _MIN_DBL_DELTA +# +#def regularize_full(variance): +# # Trace of a positive definite matrix is always > 0 +# delta = N.trace(variance) / variance.shape[0] +# if delta > _MIN_DBL_DELTA: +# return variance + alpha * delta +# else: +# return variance + alpha * _MIN_DBL_DELTA -def regularize_full(variance): - # Trace of a positive definite matrix is always > 0 - delta = N.trace(variance) / variance.shape[0] - if delta > _MIN_DBL_DELTA: - return variance + alpha * delta - else: - return variance + alpha * _MIN_DBL_DELTA - # Misc functions def bic(lk, deg, n): """ Expects lk to be log likelihood """ @@ -369,127 +352,129 @@ return False if __name__ == "__main__": - import copy - #============================= - # Simple GMM with 5 components - #============================= + pass + ## import copy + ## #============================= + ## # Simple GMM with 5 components + ## #============================= - #+++++++++++++++++++++++++++++ - # Meta parameters of the model - # - k: Number of components - # - d: dimension of each Gaussian - # - mode: Mode of covariance matrix: full or diag - # - nframes: number of frames (frame = one data point = one - # row of d elements - k = 2 - d = 1 - mode = 'full' - nframes = 1e3 + ## #+++++++++++++++++++++++++++++ + ## # Meta parameters of the model + ## # - k: Number of components + ## # - d: dimension of each Gaussian + ## # - mode: Mode of covariance matrix: full or diag + ## # - nframes: number of frames (frame = one data point = one + ## # row of d elements + ## k = 2 + ## d = 1 + ## mode = 'full' + ## nframes = 1e3 - #+++++++++++++++++++++++++++++++++++++++++++ - # Create an artificial GMM model, samples it - #+++++++++++++++++++++++++++++++++++++++++++ - print "Generating the mixture" - # Generate a model with k components, d dimensions - w, mu, va = GM.gen_param(d, k, mode, spread = 3) - gm = GM(d, k, mode) - gm.set_param(w, mu, va) + ## #+++++++++++++++++++++++++++++++++++++++++++ + ## # Create an artificial GMM model, samples it + ## #+++++++++++++++++++++++++++++++++++++++++++ + ## print "Generating the mixture" + ## # Generate a model with k components, d dimensions + ## w, mu, va = GM.gen_param(d, k, mode, spread = 3) + ## gm = GM(d, k, mode) + ## gm.set_param(w, mu, va) - # Sample nframes frames from the model - data = gm.sample(nframes) + ## # Sample nframes frames from the model + ## data = gm.sample(nframes) - #++++++++++++++++++++++++ - # Learn the model with EM - #++++++++++++++++++++++++ + ## #++++++++++++++++++++++++ + ## # Learn the model with EM + ## #++++++++++++++++++++++++ - # Init the model - print "Init a model for learning, with kmean for initialization" - lgm = GM(d, k, mode) - gmm = GMM(lgm, 'kmean') - gmm.init(data) + ## # Init the model + ## print "Init a model for learning, with kmean for initialization" + ## lgm = GM(d, k, mode) + ## gmm = GMM(lgm, 'kmean') + ## gmm.init(data) - # Keep the initialized model for drawing - gm0 = copy.copy(lgm) + ## # Keep the initialized model for drawing + ## gm0 = copy.copy(lgm) - # The actual EM, with likelihood computation - niter = 10 - like = N.zeros(niter) + ## # The actual EM, with likelihood computation + ## niter = 10 + ## like = N.zeros(niter) - print "computing..." - for i in range(niter): - g, tgd = gmm.sufficient_statistics(data) - like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) - gmm.update_em(data, g) - # # Alternative form, by using EM class: as the EM class - # # is quite rudimentary now, it is not very useful, just save - # # a few lines - # em = EM() - # like = em.train(data, gmm, niter) + ## print "computing..." + ## for i in range(niter): + ## g, tgd = gmm.sufficient_statistics(data) + ## like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + ## gmm.update_em(data, g) + ## # # Alternative form, by using EM class: as the EM class + ## # # is quite rudimentary now, it is not very useful, just save + ## # # a few lines + ## # em = EM() + ## # like = em.train(data, gmm, niter) - #+++++++++++++++ - # Draw the model - #+++++++++++++++ - print "drawing..." - import pylab as P - P.subplot(2, 1, 1) + ## #+++++++++++++++ + ## # Draw the model + ## #+++++++++++++++ + ## print "drawing..." + ## import pylab as P + ## P.subplot(2, 1, 1) - if not d == 1: - # Draw what is happening - P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') + ## if not d == 1: + ## # Draw what is happening + ## P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - # Real confidence ellipses - Xre, Yre = gm.conf_ellipses() - P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides') - for i in range(1,k): - P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_') + ## # Real confidence ellipses + ## Xre, Yre = gm.conf_ellipses() + ## P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides') + ## for i in range(1,k): + ## P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_') - # Initial confidence ellipses as found by kmean - X0e, Y0e = gm0.conf_ellipses() - P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides') - for i in range(1,k): - P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_') + ## # Initial confidence ellipses as found by kmean + ## X0e, Y0e = gm0.conf_ellipses() + ## P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides') + ## for i in range(1,k): + ## P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_') - # Values found by EM - Xe, Ye = lgm.conf_ellipses() - P.plot(Xe[0], Ye[0], 'r', label = 'confidence ellipsoides found by EM') - for i in range(1,k): - P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_') - P.legend(loc = 0) - else: - # Real confidence ellipses - h = gm.plot1d() - [i.set_color('g') for i in h['pdf']] - h['pdf'][0].set_label('true pdf') + ## # Values found by EM + ## Xe, Ye = lgm.conf_ellipses() + ## P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by" + ## "EM") + ## for i in range(1,k): + ## P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_') + ## P.legend(loc = 0) + ## else: + ## # Real confidence ellipses + ## h = gm.plot1d() + ## [i.set_color('g') for i in h['pdf']] + ## h['pdf'][0].set_label('true pdf') - # Initial confidence ellipses as found by kmean - h0 = gm0.plot1d() - [i.set_color('k') for i in h0['pdf']] - h0['pdf'][0].set_label('initial pdf') + ## # Initial confidence ellipses as found by kmean + ## h0 = gm0.plot1d() + ## [i.set_color('k') for i in h0['pdf']] + ## h0['pdf'][0].set_label('initial pdf') - # Values found by EM - hl = lgm.plot1d(fill = 1, level = 0.66) - [i.set_color('r') for i in hl['pdf']] - hl['pdf'][0].set_label('pdf found by EM') + ## # Values found by EM + ## hl = lgm.plot1d(fill = 1, level = 0.66) + ## [i.set_color('r') for i in hl['pdf']] + ## hl['pdf'][0].set_label('pdf found by EM') - P.legend(loc = 0) + ## P.legend(loc = 0) - P.subplot(2, 1, 2) - P.plot(like) - P.title('log likelihood') + ## P.subplot(2, 1, 2) + ## P.plot(like) + ## P.title('log likelihood') - # #++++++++++++++++++ - # # Export the figure - # #++++++++++++++++++ - # F = P.gcf() - # DPI = F.get_dpi() - # DefaultSize = F.get_size_inches() - # # the default is 100dpi for savefig: - # F.savefig("example1.png") + ## # #++++++++++++++++++ + ## # # Export the figure + ## # #++++++++++++++++++ + ## # F = P.gcf() + ## # DPI = F.get_dpi() + ## # DefaultSize = F.get_size_inches() + ## # # the default is 100dpi for savefig: + ## # F.savefig("example1.png") - # # Now make the image twice as big, while keeping the fonts and all the - # # same size - # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) ) - # Size = F.get_size_inches() - # print "Size in Inches", Size - # F.savefig("example2.png") - P.show() + ## # # Now make the image twice as big, while keeping the fonts and all the + ## # # same size + ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) ) + ## # Size = F.get_size_inches() + ## # print "Size in Inches", Size + ## # F.savefig("example2.png") + ## P.show() Modified: trunk/Lib/sandbox/pyem/info.py =================================================================== --- trunk/Lib/sandbox/pyem/info.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/info.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,61 +1,63 @@ """ -Routines for Gaussian Mixture Models -and learning with Expectation Maximization -========================================== +Routines for Gaussian Mixture Models and learning with Expectation Maximization +=============================================================================== -This module contains classes and function to compute multivariate Gaussian densities -(diagonal and full covariance matrices), Gaussian mixtures, Gaussian mixtures models -and an Em trainer. +This module contains classes and function to compute multivariate Gaussian +densities (diagonal and full covariance matrices), Gaussian mixtures, Gaussian +mixtures models and an Em trainer. More specifically, the module defines the following classes, functions: - densities.gauss_den: function to compute multivariate Gaussian pdf -- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can be -created from its parameters weights, mean and variances, or from its meta parameters -d (dimension of the Gaussian) and k (number of components in the mixture). A Gaussian -Model can then be sampled or plot (if d>1, plot confidence ellipsoids projected on -2 chosen dimensions, if d == 1, plot the pdf of each component and fill the zone -of confidence for a given level) -- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is constructed -from a GM model gm, and can be used to train gm. The GMM can be initiated by -kmean or at random, and can compute sufficient statistics, and update its parameters -from the sufficient statistics. -- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq kmeans, since -its does not give membership of observations. +- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can + be created from its parameters weights, mean and variances, or from its meta + parameters d (dimension of the Gaussian) and k (number of components in the + mixture). A Gaussian Model can then be sampled or plot (if d>1, plot + confidence ellipsoids projected on 2 chosen dimensions, if d == 1, plot the + pdf of each component and fill the zone of confidence for a given level) +- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is + constructed from a GM model gm, and can be used to train gm. The GMM can be + initiated by kmean or at random, and can compute sufficient statistics, and + update its parameters from the sufficient statistics. +- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq + kmeans, since its does not give membership of observations. Example of use: - #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - # Create an artificial 2 dimension, 3 clusters GM model, samples it - #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - w, mu, va = GM.gen_param(2, 3, 'diag', spread = 1.5) - gm = GM.fromvalues(w, mu, va) +--------------- - # Sample 1000 frames from the model - data = gm.sample(1000) +>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +>>> # Create an artificial 2 dimension, 3 clusters GM model, samples it +>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +>>> w, mu, va = GM.gen_param(2, 3, 'diag', spread = 1.5) +>>> gm = GM.fromvalues(w, mu, va) +>>> +>>> # Sample 1000 frames from the model +>>> data = gm.sample(1000) +>>> +>>> #++++++++++++++++++++++++ +>>> # Learn the model with EM +>>> #++++++++++++++++++++++++ +>>> # Init the model +>>> lgm = GM(d, k, mode) +>>> gmm = GMM(lgm, 'kmean') +>>> +>>> # The actual EM, with likelihood computation. The threshold +>>> # is compared to the (linearly appromixated) derivative of the likelihood +>>> em = EM() +>>> like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) - #++++++++++++++++++++++++ - # Learn the model with EM - #++++++++++++++++++++++++ - # Init the model - lgm = GM(d, k, mode) - gmm = GMM(lgm, 'kmean') - - # The actual EM, with likelihood computation. The threshold - # is compared to the (linearly appromixated) derivative of the likelihood - em = EM() - like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) - Files example.py and example2.py show more capabilities of the toolbox, including plotting capabilities (using matplotlib) and model selection using Bayesian Information Criterion (BIC). Bibliography: - * Maximum likelihood from incomplete data via the EM algorithm in Journal of - the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P. Dempster, - N. M. Laird, and D. B. Rubin - * Bayesian Approaches to Gaussian Mixture Modelling (1998) by - Stephen J. Roberts, Dirk Husmeier, Iead Rezek, William Penny in - IEEE Transactions on Pattern Analysis and Machine Intelligence + +- Maximum likelihood from incomplete data via the EM algorithm in Journal of + the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P. + Dempster, N. M. Laird, and D. B. Rubin +- Bayesian Approaches to Gaussian Mixture Modelling (1998) by Stephen J. + Roberts, Dirk Husmeier, Iead Rezek, William Penny in IEEE Transactions on + Pattern Analysis and Machine Intelligence Copyright: David Cournapeau 2006 License: BSD-style (see LICENSE.txt in main source directory) Modified: trunk/Lib/sandbox/pyem/online_em.py =================================================================== --- trunk/Lib/sandbox/pyem/online_em.py 2007-06-09 11:43:51 UTC (rev 3086) +++ trunk/Lib/sandbox/pyem/online_em.py 2007-06-09 14:03:01 UTC (rev 3087) @@ -1,28 +1,27 @@ # /usr/bin/python -# Last Change: Fri Jun 08 08:00 PM 2007 J +# Last Change: Sat Jun 09 10:00 PM 2007 J -#--------------------------------------------- -# This is not meant to be used yet !!!! I am -# not sure how to integrate this stuff inside -# the package yet. The cases are: -# - we have a set of data, and we want to test online EM -# compared to normal EM -# - we do not have all the data before putting them in online EM: -# eg current frame depends on previous frame in some way. +# This is not meant to be used yet !!!! I am not sure how to integrate this +# stuff inside the package yet. The cases are: +# - we have a set of data, and we want to test online EM compared to normal +# EM +# - we do not have all the data before putting them in online EM: eg current +# frame depends on previous frame in some way. # TODO: # - Add biblio -# - Look back at articles for discussion for init, regularization and +# - Look back at articles for discussion for init, regularization and # convergence rates -# - the function sufficient_statistics does not really return SS. This is not a -# big problem, but it would be better to really return them as the name implied. +# - the function sufficient_statistics does not really return SS. This is not +# a big problem, but it would be better to really return them as the name +# implied. import numpy as N from numpy import mean from numpy.testing import assert_array_almost_equal, assert_array_equal -from gmm_em import ExpMixtureModel, GMM, EM -from gauss_mix import GM +from gmm_em import ExpMixtureModel#, GMM, EM +#from gauss_mix import GM from scipy.cluster.vq import kmeans2 as kmean import densities2 as D @@ -60,22 +59,24 @@ k = self.gm.k d = self.gm.d if self.gm.mode == 'diag': - w = N.ones(k) / k + w = N.ones(k) / k # Init the internal state of EM - self.cx = N.outer(w, mean(init_data, 0)) - self.cxx = N.outer(w, mean(init_data ** 2, 0)) + self.cx = N.outer(w, mean(init_data, 0)) + self.cxx = N.outer(w, mean(init_data ** 2, 0)) # w, mu and va init is the same that in the standard case - (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix') - mu = code.copy() - va = N.zeros((k, d)) + (code, label) = kmean(init_data, init_data[0:k, :], iter = 10, + minit = 'matrix') + mu = code.copy() + va = N.zeros((k, d)) for i in range(k): for j in range(d): - va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0) + va [i, j] = N.cov(init_data[N.where(label==i), j], + rowvar = 0) else: raise OnGmmParamError("""init_online not implemented for - mode %s yet""", mode) + mode %s yet""", self.gm.mode) self.gm.set_param(w, mu, va) # c* are the parameters which are computed at every step (ie @@ -95,22 +96,24 @@ k = self.gm.k d = self.gm.d if self.gm.mode == 'diag': - w = N.ones(k) / k + w = N.ones(k) / k # Init the internal state of EM - self.cx = N.outer(w, mean(init_data, 0)) - self.cxx = N.outer(w, mean(init_data ** 2, 0)) + self.cx = N.outer(w, mean(init_data, 0)) + self.cxx = N.outer(w, mean(init_data ** 2, 0)) # w, mu and va init is the same that in the standard case - (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix') - mu = code.copy() - va = N.zeros((k, d)) + (code, label) = kmean(init_data, init_data[0:k, :], + iter = niter, minit = 'matrix') + mu = code.copy() + va = N.zeros((k, d)) for i in range(k): for j in range(d): - va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0) + va[i, j] = N.cov(init_data[N.where(label==i), j], + rowvar = 0) else: raise OnGmmParamError("""init_online not implemented for - mode %s yet""", mode) + mode %s yet""", self.gm.mode) self.gm.set_param(w, mu, va) # c* are the parameters which are computed at every step (ie @@ -278,132 +281,133 @@ if __name__ == '__main__': - d = 1 - k = 2 - mode = 'diag' - nframes = int(5e3) - emiter = 4 - seed(5) + pass + #d = 1 + #k = 2 + #mode = 'diag' + #nframes = int(5e3) + #emiter = 4 + #seed(5) - #+++++++++++++++++++++++++++++++++++++++++++++++++ - # Generate a model with k components, d dimensions - #+++++++++++++++++++++++++++++++++++++++++++++++++ - w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) - gm = GM.fromvalues(w, mu, va) - # Sample nframes frames from the model - data = gm.sample(nframes) + ##+++++++++++++++++++++++++++++++++++++++++++++++++ + ## Generate a model with k components, d dimensions + ##+++++++++++++++++++++++++++++++++++++++++++++++++ + #w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) + #gm = GM.fromvalues(w, mu, va) + ## Sample nframes frames from the model + #data = gm.sample(nframes) - #++++++++++++++++++++++++++++++++++++++++++ - # Approximate the models with classical EM - #++++++++++++++++++++++++++++++++++++++++++ - # Init the model - lgm = GM(d, k, mode) - gmm = GMM(lgm, 'kmean') - gmm.init(data) + ##++++++++++++++++++++++++++++++++++++++++++ + ## Approximate the models with classical EM + ##++++++++++++++++++++++++++++++++++++++++++ + ## Init the model + #lgm = GM(d, k, mode) + #gmm = GMM(lgm, 'kmean') + #gmm.init(data) - gm0 = copy.copy(gmm.gm) - # The actual EM, with likelihood computation - like = N.zeros(emiter) - for i in range(emiter): - g, tgd = gmm.sufficient_statistics(data) - like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) - gmm.update_em(data, g) + #gm0 = copy.copy(gmm.gm) + ## The actual EM, with likelihood computation + #like = N.zeros(emiter) + #for i in range(emiter): + # g, tgd = gmm.sufficient_statistics(data) + # like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + # gmm.update_em(data, g) - #++++++++++++++++++++++++++++++++++++++++ - # Approximate the models with online EM - #++++++++++++++++++++++++++++++++++++++++ - ogm = GM(d, k, mode) - ogmm = OnGMM(ogm, 'kmean') - init_data = data[0:nframes / 20, :] - ogmm.init(init_data) + ##++++++++++++++++++++++++++++++++++++++++ + ## Approximate the models with online EM + ##++++++++++++++++++++++++++++++++++++++++ + #ogm = GM(d, k, mode) + #ogmm = OnGMM(ogm, 'kmean') + #init_data = data[0:nframes / 20, :] + #ogmm.init(init_data) - # Forgetting param - ku = 0.005 - t0 = 200 - lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) - nu0 = 0.2 - nu = N.zeros((len(lamb), 1)) - nu[0] = nu0 - for i in range(1, len(lamb)): - nu[i] = 1./(1 + lamb[i] / nu[i-1]) + ## Forgetting param + #ku = 0.005 + #t0 = 200 + #lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0) + #nu0 = 0.2 + #nu = N.zeros((len(lamb), 1)) + #nu[0] = nu0 + #for i in range(1, len(lamb)): + # nu[i] = 1./(1 + lamb[i] / nu[i-1]) - print "meth1" - # object version of online EM - for t in range(nframes): - ogmm.compute_sufficient_statistics_frame(data[t], nu[t]) - ogmm.update_em_frame() + #print "meth1" + ## object version of online EM + #for t in range(nframes): + # ogmm.compute_sufficient_statistics_frame(data[t], nu[t]) + # ogmm.update_em_frame() - ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) + #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) - # 1d optimized version - ogm2 = GM(d, k, mode) - ogmm2 = OnGMM1d(ogm2, 'kmean') - ogmm2.init(init_data[:, 0]) + ## 1d optimized version + #ogm2 = GM(d, k, mode) + #ogmm2 = OnGMM1d(ogm2, 'kmean') + #ogmm2.init(init_data[:, 0]) - print "meth2" - # object version of online EM - for t in range(nframes): - ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t]) - ogmm2.update_em_frame() + #print "meth2" + ## object version of online EM + #for t in range(nframes): + # ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t]) + # ogmm2.update_em_frame() - #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva) + ##ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva) - print ogmm.cw - print ogmm2.cw - #+++++++++++++++ - # Draw the model - #+++++++++++++++ - print "drawing..." - import pylab as P - P.subplot(2, 1, 1) + #print ogmm.cw + #print ogmm2.cw + ##+++++++++++++++ + ## Draw the model + ##+++++++++++++++ + #print "drawing..." + #import pylab as P + #P.subplot(2, 1, 1) - if not d == 1: - # Draw what is happening - P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') + #if not d == 1: + # # Draw what is happening + # P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - h = gm.plot() - [i.set_color('g') for i in h] - h[0].set_label('true confidence ellipsoides') + # h = gm.plot() + # [i.set_color('g') for i in h] + # h[0].set_label('true confidence ellipsoides') - h = gm0.plot() - [i.set_color('k') for i in h] - h[0].set_label('initial confidence ellipsoides') + # h = gm0.plot() + # [i.set_color('k') for i in h] + # h[0].set_label('initial confidence ellipsoides') - h = lgm.plot() - [i.set_color('r') for i in h] - h[0].set_label('confidence ellipsoides found by EM') + # h = lgm.plot() + # [i.set_color('r') for i in h] + # h[0].set_label('confidence ellipsoides found by EM') - h = ogmm.gm.plot() - [i.set_color('m') for i in h] - h[0].set_label('confidence ellipsoides found by Online EM') + # h = ogmm.gm.plot() + # [i.set_color('m') for i in h] + # h[0].set_label('confidence ellipsoides found by Online EM') - # P.legend(loc = 0) - else: - # Real confidence ellipses - h = gm.plot1d() - [i.set_color('g') for i in h['pdf']] - h['pdf'][0].set_label('true pdf') + # # P.legend(loc = 0) + #else: + # # Real confidence ellipses + # h = gm.plot1d() + # [i.set_color('g') for i in h['pdf']] + # h['pdf'][0].set_label('true pdf') - # Initial confidence ellipses as found by kmean - h0 = gm0.plot1d() - [i.set_color('k') for i in h0['pdf']] - h0['pdf'][0].set_label('initial pdf') + # # Initial confidence ellipses as found by kmean + # h0 = gm0.plot1d() + # [i.set_color('k') for i in h0['pdf']] + # h0['pdf'][0].set_label('initial pdf') - # Values found by EM - hl = lgm.plot1d(fill = 1, level = 0.66) - [i.set_color('r') for i in hl['pdf']] - hl['pdf'][0].set_label('pdf found by EM') + # # Values found by EM + # hl = lgm.plot1d(fill = 1, level = 0.66) + # [i.set_color('r') for i in hl['pdf']] + # hl['pdf'][0].set_label('pdf found by EM') - P.legend(loc = 0) + # P.legend(loc = 0) - # Values found by Online EM - hl = ogmm.gm.plot1d(fill = 1, level = 0.66) - [i.set_color('m') for i in hl['pdf']] - hl['pdf'][0].set_label('pdf found by Online EM') + # # Values found by Online EM + # hl = ogmm.gm.plot1d(fill = 1, level = 0.66) + # [i.set_color('m') for i in hl['pdf']] + # hl['pdf'][0].set_label('pdf found by Online EM') - P.legend(loc = 0) + # P.legend(loc = 0) - P.subplot(2, 1, 2) - P.plot(nu) - P.title('Learning rate') - P.show() + #P.subplot(2, 1, 2) + #P.plot(nu) + #P.title('Learning rate') + #P.show() From scipy-svn at scipy.org Sun Jun 10 05:37:03 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 10 Jun 2007 04:37:03 -0500 (CDT) Subject: [Scipy-svn] r3088 - trunk/Lib/sandbox/pyem Message-ID: <20070610093703.F3CE939C018@new.scipy.org> Author: cdavid Date: 2007-06-10 04:36:59 -0500 (Sun, 10 Jun 2007) New Revision: 3088 Modified: trunk/Lib/sandbox/pyem/gmm_em.py Log: Add special initialization method for mixture models for testing purpose. Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-09 14:03:01 UTC (rev 3087) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-10 09:36:59 UTC (rev 3088) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Sat Jun 09 10:00 PM 2007 J +# Last Change: Sun Jun 10 06:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using @@ -103,6 +103,21 @@ self.isinit = True + def init_test(self, data): + """Use values already in the model as initialization. + + Useful for testing purpose when reproducability is necessary.""" + try: + if self.gm.check_state(): + self.isinit = True + else: + raise GmParamError("the mixture is initialized, but the"\ + "parameters are not valid") + + except GmParamError, e: + print "Model is not properly initalized, cannot init EM." + raise "Message was %s" % str(e) + # TODO: # - format of parameters ? For variances, list of variances matrix, # keep the current format, have 3d matrices ? @@ -118,13 +133,12 @@ gm : GM the mixture model to train. init : string - initialization method to use. - - """ + initialization method to use.""" self.gm = gm # Possible init methods - init_methods = {'kmean': self.init_kmean, 'random' : self.init_random} + init_methods = {'kmean': self.init_kmean, 'random' : self.init_random, + 'test': self.init_test} if init not in init_methods: raise GmmParamError('init method %s not recognized' + str(init)) From scipy-svn at scipy.org Sun Jun 10 12:28:29 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Sun, 10 Jun 2007 11:28:29 -0500 (CDT) Subject: [Scipy-svn] r3089 - in trunk/Lib/sandbox/rbf: . tests Message-ID: <20070610162829.4072C39C1A0@new.scipy.org> Author: jtravs Date: 2007-06-10 11:28:20 -0500 (Sun, 10 Jun 2007) New Revision: 3089 Modified: trunk/Lib/sandbox/rbf/rbf.py trunk/Lib/sandbox/rbf/tests/example1.py Log: Updates to sandbox.rbf module. Modified: trunk/Lib/sandbox/rbf/rbf.py =================================================================== --- trunk/Lib/sandbox/rbf/rbf.py 2007-06-10 09:36:59 UTC (rev 3088) +++ trunk/Lib/sandbox/rbf/rbf.py 2007-06-10 16:28:20 UTC (rev 3089) @@ -56,9 +56,9 @@ def _function(self, r): if self.function.lower() == 'multiquadric': - return sqrt((self.epsilon*r)**2 + 1) + return sqrt((1.0/self.epsilon*r)**2 + 1) elif self.function.lower() == 'inverse multiquadric': - return 1.0/sqrt((self.epsilon*r)**2 + 1) + return 1.0/sqrt((1.0/self.epsilon*r)**2 + 1) elif self.function.lower() == 'gausian': return exp(-(self.epsilon*r)**2) elif self.function.lower() == 'cubic': Modified: trunk/Lib/sandbox/rbf/tests/example1.py =================================================================== --- trunk/Lib/sandbox/rbf/tests/example1.py 2007-06-10 09:36:59 UTC (rev 3088) +++ trunk/Lib/sandbox/rbf/tests/example1.py 2007-06-10 16:28:20 UTC (rev 3089) @@ -25,26 +25,29 @@ p.subplot(2,1,2) p.plot(x,y,'bo',xi,fi,'g',xi, s.sin(xi),'r') p.title('RBF interpolation - multiquadrics') -p.show() +p.savefig('rbf1d.png') +p.close() # 2-d tests - setup scattered data -x = s.rand(50,1)*4-2 -y = s.rand(50,1)*4-2 +x = s.rand(100)*4.0-2.0 +y = s.rand(100)*4.0-2.0 z = x*s.exp(-x**2-y**2) -ti = s.linspace(-2.0,2.0,81) +ti = s.linspace(-2.0,2.0,100) (XI,YI) = s.meshgrid(ti,ti) # use RBF -rbf = Rbf(x.flatten(),y.flatten(),z.flatten(),eps=2) -ZI = rbf(XI.flatten(), YI.flatten()) -ZI.shape = XI.shape +rbf = Rbf(x,y,z,epsilon=2) +ZI = rbf(XI, YI) # plot the result -from enthought.tvtk.tools import mlab -f=mlab.figure(browser=False) -su=mlab.Surf(XI,YI,ZI,ZI,scalar_visibility=True) -f.add(su) -su.lut_type='blue-red' -f.objects[0].axis.z_label='value' -pp = mlab.Spheres(s.c_[x.flatten(), y.flatten(), z.flatten()],radius=0.03) -f.add(pp) +n = p.normalize(-2., 2.) +p.subplot(1,1,1) +p.pcolor(XI,YI,ZI,cmap=p.cm.jet) +p.scatter(x,y,100,z,cmap=p.cm.jet) +p.title('RBF interpolation - multiquadrics') +p.xlim(-2,2) +p.ylim(-2,2) +p.colorbar() +p.savefig('rbf2d.png') +p.close() + From scipy-svn at scipy.org Mon Jun 11 03:01:32 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 02:01:32 -0500 (CDT) Subject: [Scipy-svn] r3090 - in trunk/Lib/sandbox/pyem: . doc tests Message-ID: <20070611070132.3185939C050@new.scipy.org> Author: cdavid Date: 2007-06-11 02:01:12 -0500 (Mon, 11 Jun 2007) New Revision: 3090 Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/doc/tutorial.pdf trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py Log: * Correct bogus GM._get_va which caused bogus isodensity plot + test * Support for plain matrix in GM.check_state Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-10 16:28:20 UTC (rev 3089) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-11 07:01:12 UTC (rev 3090) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Sat Jun 09 10:00 PM 2007 J +# Last Change: Mon Jun 11 03:00 PM 2007 J """This module implements various basic functions related to multivariate gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" @@ -246,9 +246,9 @@ circle = mahal * N.array([N.cos(theta), N.sin(theta)]) # Get the dimension which we are interested in: - mu = mu[dim] + mu = mu[c] if mode == 'diag': - va = va[dim] + va = va[c] elps = N.outer(mu, N.ones(npoints)) elps += N.dot(N.diag(N.sqrt(va)), circle) elif mode == 'full': Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf =================================================================== (Binary files differ) Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-10 16:28:20 UTC (rev 3089) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-11 07:01:12 UTC (rev 3090) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Sat Jun 09 10:00 PM 2007 J +# Last Change: Mon Jun 11 03:00 PM 2007 J """Module implementing GM, a class which represents Gaussian mixtures. @@ -296,27 +296,26 @@ to be positive definite. """ if not self.is_valid: - raise GmParamError("""Parameters of the model has not been - set yet, please set them using self.set_param()""") + raise GmParamError("Parameters of the model has not been"\ + "set yet, please set them using self.set_param()") - if self.mode == 'full': - raise NotImplementedError, "not implemented for full mode yet" - - # # How to check w: if one component is negligeable, what shall - # # we do ? - # M = N.max(self.w) - # m = N.min(self.w) - - # maxc = m / M - # Check condition number for cov matrix - cond = N.zeros(self.k) - ava = N.absolute(self.va) - for c in range(self.k): - cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :]) + if self.mode == 'diag': + tinfo = N.finfo(self.va.dtype) + if N.any(self.va < tinfo.eps): + raise GmParamError("variances are singular") + elif self.mode == 'full': + try: + d = self.d + for i in range(self.k): + N.linalg.cholesky(self.va[i*d:i*d+d, :]) + except N.linalg.LinAlgError: + raise GmParamError("matrix %d is singular " % i) - print cond + else: + raise GmParamError("Unknown mode") + return True @classmethod def gen_param(cls, d, nc, varmode = 'diag', spread = 1): """Generate random, valid parameters for a gaussian mixture model. @@ -341,13 +340,14 @@ ----- This is a class method. """ - w = abs(randn(nc)) + w = N.abs(randn(nc)) w = w / sum(w, 0) - mu = spread * randn(nc, d) + mu = spread * N.sqrt(d) * randn(nc, d) if varmode == 'diag': - va = abs(randn(nc, d)) + va = N.abs(randn(nc, d)) elif varmode == 'full': + # If A is invertible, A'A is positive definite va = randn(nc * d, d) for k in range(nc): va[k*d:k*d+d] = N.dot( va[k*d:k*d+d], @@ -509,7 +509,7 @@ return retval def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50, - maxlevel = 0.95): + maxlevel = 0.95, V = None): """Do all the necessary computation for contour plot of mixture's density. @@ -556,8 +556,10 @@ N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim) lden = N.log(den) # XXX: how to find "good" values for level ? - V = [-5, -3, -1, -0.5, ] - V.extend(N.linspace(0, N.max(lden), 4).tolist()) + if V is None: + #V = [-5, -3, -1, -0.5, ] + #V.extend(list(N.linspace(0, N.max(lden), 20))) + V = N.linspace(-5, N.max(lden), 20) return X, Y, lden, N.array(V) def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM): @@ -578,7 +580,8 @@ return X, Y, den def _get_va(self, dim): - """Returns variance limited do dimension in dim.""" + """Returns variance limited do 2 dimension in tuple dim.""" + assert len(dim) == 2 dim = N.array(dim) if dim.any() < 0 or dim.any() >= self.d: raise ValueError("dim elements should be between 0 and dimension"\ @@ -586,9 +589,16 @@ if self.mode == 'diag': return self.va[:, dim] elif self.mode == 'full': - tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)]) - tidx.flatten() - return self.va[tidx, dim] + ld = dim.size + vaselid = N.empty((ld * self.k, ld), N.int) + for i in range(self.k): + vaselid[ld*i] = dim[0] + i * self.d + vaselid[ld*i+1] = dim[1] + i * self.d + vadid = N.empty((ld * self.k, ld), N.int) + for i in range(self.k): + vadid[ld*i] = dim + vadid[ld*i+1] = dim + return self.va[vaselid, vadid] else: raise ValueError("Unkown mode") Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-10 16:28:20 UTC (rev 3089) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-11 07:01:12 UTC (rev 3090) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Sun Jun 10 06:00 PM 2007 J +# Last Change: Mon Jun 11 01:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using @@ -20,7 +20,7 @@ import densities #from kmean import kmean from scipy.cluster.vq import kmeans2 as kmean -#from gauss_mix import GM +from gauss_mix import GmParamError #from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND @@ -91,13 +91,18 @@ """ Init the model at random.""" k = self.gm.k d = self.gm.d + w = N.ones(k) / k + mu = randn(k, d) if self.gm.mode == 'diag': - w = N.ones(k) / k - mu = randn(k, d) va = N.fabs(randn(k, d)) else: - raise GmmParamError("init_random not implemented for " - "mode %s yet", self.gm.mode) + # If A is invertible, A'A is positive definite + va = randn(k * d, d) + for i in range(k): + va[i*d:i*d+d] = N.dot( va[i*d:i*d+d], + va[i*d:i*d+d].T) + #raise GmmParamError("init_random not implemented for "\ + # "mode %s yet" % self.gm.mode) self.gm.set_param(w, mu, va) @@ -106,14 +111,12 @@ def init_test(self, data): """Use values already in the model as initialization. - Useful for testing purpose when reproducability is necessary.""" + Useful for testing purpose when reproducability is necessary. This does + nothing but checking that the mixture model has valid initial + values.""" + # We have try: - if self.gm.check_state(): - self.isinit = True - else: - raise GmParamError("the mixture is initialized, but the"\ - "parameters are not valid") - + self.gm.check_state() except GmParamError, e: print "Model is not properly initalized, cannot init EM." raise "Message was %s" % str(e) Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-10 16:28:20 UTC (rev 3089) +++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-11 07:01:12 UTC (rev 3090) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Sat Jun 09 03:00 PM 2007 J +# Last Change: Mon Jun 11 03:00 PM 2007 J # For now, just test that all mode/dim execute correctly @@ -41,6 +41,23 @@ except ValueError, e: print "Ok, density_grid failed as expected (with msg: " + str(e) + ")" + def test_get_va(self): + """Test _get_va for diag and full mode.""" + d = 3 + k = 2 + ld = 2 + dim = [0, 2] + w, mu, va = GM.gen_param(d, k, 'full') + va = N.arange(d*d*k).reshape(d*k, d) + gm = GM.fromvalues(w, mu, va) + tva = N.empty(ld * ld * k) + for i in range(k * ld * ld): + tva[i] = dim[i%ld] + (i % 4)/ ld * dim[1] * d + d*d * (i / (ld*ld)) + tva = tva.reshape(ld * k, ld) + sva = gm._get_va(dim) + assert N.all(sva == tva) + + if __name__ == "__main__": NumpyTest().run() From scipy-svn at scipy.org Mon Jun 11 03:07:48 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 02:07:48 -0500 (CDT) Subject: [Scipy-svn] r3091 - trunk/Lib/sandbox/pyem/examples Message-ID: <20070611070748.42C3639C050@new.scipy.org> Author: cdavid Date: 2007-06-11 02:07:43 -0500 (Mon, 11 Jun 2007) New Revision: 3091 Added: trunk/Lib/sandbox/pyem/examples/plotexamples.py Log: Add a plotting example. Added: trunk/Lib/sandbox/pyem/examples/plotexamples.py =================================================================== --- trunk/Lib/sandbox/pyem/examples/plotexamples.py 2007-06-11 07:01:12 UTC (rev 3090) +++ trunk/Lib/sandbox/pyem/examples/plotexamples.py 2007-06-11 07:07:43 UTC (rev 3091) @@ -0,0 +1,42 @@ +#! /usr/bin/env python +# Last Change: Mon Jun 11 03:00 PM 2007 J + +# This is a simple test to check whether plotting ellipsoides of confidence and +# isodensity contours match +import numpy as N +from numpy.testing import set_package_path, restore_path + +import pylab as P + +set_package_path() +import pyem +restore_path() + +# Generate a simple mixture model, plot its confidence ellipses + isodensity +# curves for both diagonal and full covariance matrices +d = 3 +k = 3 +dim = [0, 2] +# diag model +w, mu, va = pyem.GM.gen_param(d, k) +dgm = pyem.GM.fromvalues(w, mu, va) +# full model +w, mu, va = pyem.GM.gen_param(d, k, 'full', spread = 1) +fgm = pyem.GM.fromvalues(w, mu, va) + +def plot_model(gm, dim): + X, Y, Z, V = gm.density_on_grid(dim = dim) + h = gm.plot(dim = dim) + [i.set_linestyle('-.') for i in h] + P.contour(X, Y, Z, V) + data = gm.sample(200) + P.plot(data[:, dim[0]], data[:,dim[1]], '.') + +# Plot the contours and the ellipsoids of confidence +P.subplot(2, 1, 1) +plot_model(dgm, dim) + +P.subplot(2, 1, 2) +plot_model(fgm, dim) + +P.show() From scipy-svn at scipy.org Mon Jun 11 03:10:11 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 02:10:11 -0500 (CDT) Subject: [Scipy-svn] r3092 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070611071011.4332D39C050@new.scipy.org> Author: cdavid Date: 2007-06-11 02:10:03 -0500 (Mon, 11 Jun 2007) New Revision: 3092 Removed: trunk/Lib/sandbox/pyem/tests/generate_test_data.py Log: Remote outdated test script. Deleted: trunk/Lib/sandbox/pyem/tests/generate_test_data.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/generate_test_data.py 2007-06-11 07:07:43 UTC (rev 3091) +++ trunk/Lib/sandbox/pyem/tests/generate_test_data.py 2007-06-11 07:10:03 UTC (rev 3092) @@ -1,53 +0,0 @@ -# Last Change: Wed Oct 18 06:00 PM 2006 J - -import numpy as N -import tables as T - -from numpy.random import seed - -from gmm_em import multiple_gauss_den -from gauss_mix import GM -from _c_densities import gauss_den - -filename = 'test_mgden.h5'; -h5file = T.openFile(filename, 'w') -h5file.createGroup(h5file.root, 'hyperparams') -h5file.createGroup(h5file.root, 'params') -h5file.createGroup(h5file.root, 'data') - -d = 1 -k = 2 -type = 'diag' -nframes = int(1e3) - -h5file.createArray(h5file.root.hyperparams, 'dimension', d) -h5file.createArray(h5file.root.hyperparams, 'type', type) -h5file.createArray(h5file.root.hyperparams, 'nclusters', k) - -w, mu, va = GM.gen_param(d, k, type) - -h5file.createArray(h5file.root.params, 'weights', w) -h5file.createArray(h5file.root.params, 'means', mu) -h5file.createArray(h5file.root.params, 'variances', va) - -gm = GM.fromvalues(w, mu, va) -# Sample nframes frames from the model -data = gm.sample(nframes) - -h5file.createArray(h5file.root.data, 'data', data) - -w1, mu1, va1 = GM.gen_param(d, k, type) - -out = multiple_gauss_den(data, mu1, va1) -out1 = gauss_den(data, mu1[0, :], va1[0, :]) - -h5file.createArray(h5file.root.params, 'w', w1) -h5file.createArray(h5file.root.params, 'mu', mu1) -h5file.createArray(h5file.root.params, 'va', va1) -h5file.createArray(h5file.root.data, 'out', out) - -h5file.createArray(h5file.root.params, 'mu1', mu1[0,:]) -h5file.createArray(h5file.root.params, 'va1', va1[0,:]) -h5file.createArray(h5file.root.data, 'out1', out1) - -h5file.close() From scipy-svn at scipy.org Mon Jun 11 05:18:31 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 04:18:31 -0500 (CDT) Subject: [Scipy-svn] r3093 - trunk/Lib/sandbox/pyem/doc Message-ID: <20070611091831.C426239C110@new.scipy.org> Author: cdavid Date: 2007-06-11 04:18:25 -0500 (Mon, 11 Jun 2007) New Revision: 3093 Modified: trunk/Lib/sandbox/pyem/doc/ Log: Add tex output files in ignore list for svn Property changes on: trunk/Lib/sandbox/pyem/doc ___________________________________________________________________ Name: svn:ignore - *.pyc *.swp *.pyd *.so *.prof *.out *.tex + *.pyc *.swp *.pyd *.so *.prof *.out *.tex *.aux From scipy-svn at scipy.org Mon Jun 11 05:19:13 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 04:19:13 -0500 (CDT) Subject: [Scipy-svn] r3094 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070611091913.12C6F39C110@new.scipy.org> Author: cdavid Date: 2007-06-11 04:18:57 -0500 (Mon, 11 Jun 2007) New Revision: 3094 Added: trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat trunk/Lib/sandbox/pyem/tests/generate_tests_data.py Modified: trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Add basic tests for EM, 1d, 2d, full and diag mode Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-11 09:18:25 UTC (rev 3093) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-11 09:18:57 UTC (rev 3094) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Jun 11 03:00 PM 2007 J +# Last Change: Mon Jun 11 06:00 PM 2007 J """Module implementing GM, a class which represents Gaussian mixtures. @@ -132,6 +132,7 @@ :SeeAlso: If you know already the parameters when creating the model, you can simply use the method class GM.fromvalues.""" + #XXX: when fromvalues is called, parameters are called twice... k, d, mode = check_gmm_param(weights, mu, sigma) if not k == self.k: raise GmParamError("Number of given components is %d, expected %d" @@ -664,14 +665,14 @@ """ # Check that w is valid - if N.fabs(N.sum(w, 0) - 1) > misc._MAX_DBL_DEV: + if not len(w.shape) == 1: + raise GmParamError('weight should be a rank 1 array') + + if N.fabs(N.sum(w) - 1) > misc._MAX_DBL_DEV: raise GmParamError('weight does not sum to 1') - if not len(w.shape) == 1: - raise GmParamError('weight is not a vector') - # Check that mean and va have the same number of components - K = len(w) + K = len(w) if N.ndim(mu) < 2: msg = "mu should be a K,d matrix, and a row vector if only 1 comp" Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-11 09:18:25 UTC (rev 3093) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-11 09:18:57 UTC (rev 3094) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Jun 11 01:00 PM 2007 J +# Last Change: Mon Jun 11 04:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using Added: trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat =================================================================== (Binary files differ) Property changes on: trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/Lib/sandbox/pyem/tests/generate_tests_data.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/generate_tests_data.py 2007-06-11 09:18:25 UTC (rev 3093) +++ trunk/Lib/sandbox/pyem/tests/generate_tests_data.py 2007-06-11 09:18:57 UTC (rev 3094) @@ -0,0 +1,103 @@ +#! /usr/bin/env python +# Last Change: Mon Jun 11 05:00 PM 2007 J + +# This script generates some random data used for testing EM implementations. +import copy +import numpy as N +from numpy.testing import set_package_path, restore_path +from scipy.io import savemat, loadmat + +set_package_path() +import pyem +restore_path() + +from pyem import GM, GMM, EM + +def generate_dataset(d, k, mode, nframes): + """Generate a dataset useful for EM anf GMM testing. + + returns: + data : ndarray + data from the true model. + tgm : GM + the true model (randomly generated) + gm0 : GM + the initial model + gm : GM + the trained model + """ + # Generate a model + w, mu, va = GM.gen_param(d, k, mode, spread = 2.0) + tgm = GM.fromvalues(w, mu, va) + + # Generate data from the model + data = tgm.sample(nframes) + + # Run EM on the model, by running the initialization separetely. + gmm = GMM(GM(d, k, mode), 'test') + gmm.init_random(data) + gm0 = copy.copy(gmm.gm) + + gmm = GMM(copy.copy(gmm.gm), 'test') + em = EM() + em.train(data, gmm) + + return data, tgm, gm0, gmm.gm + +def save_dataset(filename, data, tgm, gm0, gm): + dic = {'tw': tgm.w, 'tmu': tgm.mu, 'tva': tgm.va, + 'w0': gm0.w, 'mu0' : gm0.mu, 'va0': gm0.va, + 'w': gm.w, 'mu': gm.mu, 'va': gm.va, + 'data': data} + savemat(filename, dic) + +def doall(d, k, mode): + import pylab as P + + data, tgm, gm0, gm = generate_dataset(d, k, mode, 500) + filename = mode + '_%dd' % d + '_%dk.mat' % k + save_dataset(filename, data, tgm, gm0, gm) + + if d == 1: + P.subplot(2, 1, 1) + gm0.plot1d() + h = tgm.plot1d(gpdf = True) + P.hist(data[:, 0], 20, normed = 1, fill = False) + + P.subplot(2, 1, 2) + gm.plot1d() + tgm.plot1d(gpdf = True) + P.hist(data[:, 0], 20, normed = 1, fill = False) + else: + P.subplot(2, 1, 1) + gm0.plot() + h = tgm.plot() + [i.set_color('g') for i in h] + P.plot(data[:, 0], data[:, 1], '.') + + P.subplot(2, 1, 2) + gm.plot() + h = tgm.plot() + [i.set_color('g') for i in h] + P.plot(data[:, 0], data[:, 1], '.') + + P.show() + +if __name__ == '__main__': + N.random.seed(0) + d = 2 + k = 3 + mode = 'full' + doall(d, k, mode) + + N.random.seed(0) + d = 2 + k = 3 + mode = 'diag' + doall(d, k, mode) + + N.random.seed(0) + d = 1 + k = 4 + mode = 'diag' + doall(d, k, mode) Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 09:18:25 UTC (rev 3093) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 09:18:57 UTC (rev 3094) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Sat Jun 09 03:00 PM 2007 J +# Last Change: Mon Jun 11 06:00 PM 2007 J # For now, just test that all mode/dim execute correctly @@ -12,6 +12,14 @@ from pyem import GMM, GM, EM restore_path() +def load_dataset(filename): + from scipy.io import loadmat + dic = loadmat(filename, squeeze_me = False) + dic['w0'] = dic['w0'].squeeze() + dic['w'] = dic['w'].squeeze() + dic['tw'] = dic['tw'].squeeze() + return dic + class EmTest(NumpyTestCase): def _create_model_and_run_em(self, d, k, mode, nframes): #+++++++++++++++++++++++++++++++++++++++++++++++++ @@ -32,61 +40,127 @@ em = EM() lk = em.train(data, gmm) -class test_full(EmTest): - def check_1d(self, level = 1): - d = 1 - k = 2 - mode = 'full' - nframes = int(1e2) +#class test_full_run(EmTest): +# """This class only tests whether the algorithms runs. Do not check the +# results.""" +# def check_1d(self, level = 1): +# d = 1 +# k = 2 +# mode = 'full' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) +# +# def check_2d(self, level = 1): +# d = 2 +# k = 2 +# mode = 'full' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) +# +# def check_5d(self, level = 1): +# d = 5 +# k = 3 +# mode = 'full' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) +# +#class test_diag_run(EmTest): +# """This class only tests whether the algorithms runs. Do not check the +# results.""" +# def check_1d(self, level = 1): +# d = 1 +# k = 2 +# mode = 'diag' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) +# +# def check_2d(self, level = 1): +# d = 2 +# k = 2 +# mode = 'diag' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) +# +# def check_5d(self, level = 1): +# d = 5 +# k = 3 +# mode = 'diag' +# nframes = int(1e2) +# +# #seed(1) +# self._create_model_and_run_em(d, k, mode, nframes) - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) +class test_datasets(EmTest): + """This class tests whether the EM algorithms works using pre-computed + datasets.""" + def check_1d_full(self, level = 1): + d = 1 + k = 4 + mode = 'full' + # Data are exactly the same than in diagonal mode, just check that + # calling full mode works even in 1d, even if it is kind of stupid to + # do so + dic = load_dataset('diag_1d_4k.mat') - def check_2d(self, level = 1): - d = 2 - k = 2 - mode = 'full' - nframes = int(1e2) + gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) + gmm = GMM(gm, 'test') + EM().train(dic['data'], gmm) - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) + assert_array_equal(gmm.gm.w, dic['w']) + assert_array_equal(gmm.gm.mu, dic['mu']) + assert_array_equal(gmm.gm.va, dic['va']) - def check_5d(self, level = 1): - d = 5 - k = 3 - mode = 'full' - nframes = int(1e2) + def check_1d_diag(self, level = 1): + d = 1 + k = 4 + mode = 'diag' + dic = load_dataset('diag_1d_4k.mat') - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) + gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) + gmm = GMM(gm, 'test') + EM().train(dic['data'], gmm) -class test_diag(EmTest): - def check_1d(self, level = 1): - d = 1 - k = 2 - mode = 'diag' - nframes = int(1e2) + assert_array_equal(gmm.gm.w, dic['w']) + assert_array_equal(gmm.gm.mu, dic['mu']) + assert_array_equal(gmm.gm.va, dic['va']) - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) + def check_2d_full(self, level = 1): + d = 2 + k = 3 + mode = 'full' + dic = load_dataset('full_2d_3k.mat') - def check_2d(self, level = 1): - d = 2 - k = 2 - mode = 'diag' - nframes = int(1e2) + gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) + gmm = GMM(gm, 'test') + EM().train(dic['data'], gmm) - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) + assert_array_equal(gmm.gm.w, dic['w']) + assert_array_equal(gmm.gm.mu, dic['mu']) + assert_array_equal(gmm.gm.va, dic['va']) - def check_5d(self, level = 1): - d = 5 - k = 3 - mode = 'diag' - nframes = int(1e2) + def check_2d_diag(self, level = 1): + d = 2 + k = 3 + mode = 'diag' + dic = load_dataset('diag_2d_3k.mat') - #seed(1) - self._create_model_and_run_em(d, k, mode, nframes) + gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) + gmm = GMM(gm, 'test') + EM().train(dic['data'], gmm) + assert_array_equal(gmm.gm.w, dic['w']) + assert_array_equal(gmm.gm.mu, dic['mu']) + assert_array_equal(gmm.gm.va, dic['va']) + if __name__ == "__main__": NumpyTest().run() From scipy-svn at scipy.org Mon Jun 11 05:32:23 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 04:32:23 -0500 (CDT) Subject: [Scipy-svn] r3095 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070611093223.5DD3339C05A@new.scipy.org> Author: cdavid Date: 2007-06-11 04:32:17 -0500 (Mon, 11 Jun 2007) New Revision: 3095 Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Reenable tests I forgot to uncomment in gmm_em tests Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 09:18:57 UTC (rev 3094) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 09:32:17 UTC (rev 3095) @@ -40,66 +40,66 @@ em = EM() lk = em.train(data, gmm) -#class test_full_run(EmTest): -# """This class only tests whether the algorithms runs. Do not check the -# results.""" -# def check_1d(self, level = 1): -# d = 1 -# k = 2 -# mode = 'full' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) -# -# def check_2d(self, level = 1): -# d = 2 -# k = 2 -# mode = 'full' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) -# -# def check_5d(self, level = 1): -# d = 5 -# k = 3 -# mode = 'full' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) -# -#class test_diag_run(EmTest): -# """This class only tests whether the algorithms runs. Do not check the -# results.""" -# def check_1d(self, level = 1): -# d = 1 -# k = 2 -# mode = 'diag' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) -# -# def check_2d(self, level = 1): -# d = 2 -# k = 2 -# mode = 'diag' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) -# -# def check_5d(self, level = 1): -# d = 5 -# k = 3 -# mode = 'diag' -# nframes = int(1e2) -# -# #seed(1) -# self._create_model_and_run_em(d, k, mode, nframes) +class test_full_run(EmTest): + """This class only tests whether the algorithms runs. Do not check the + results.""" + def check_1d(self, level = 1): + d = 1 + k = 2 + mode = 'full' + nframes = int(1e2) + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + + def check_2d(self, level = 1): + d = 2 + k = 2 + mode = 'full' + nframes = int(1e2) + + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + + def check_5d(self, level = 1): + d = 5 + k = 3 + mode = 'full' + nframes = int(1e2) + + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + +class test_diag_run(EmTest): + """This class only tests whether the algorithms runs. Do not check the + results.""" + def check_1d(self, level = 1): + d = 1 + k = 2 + mode = 'diag' + nframes = int(1e2) + + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + + def check_2d(self, level = 1): + d = 2 + k = 2 + mode = 'diag' + nframes = int(1e2) + + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + + def check_5d(self, level = 1): + d = 5 + k = 3 + mode = 'diag' + nframes = int(1e2) + + #seed(1) + self._create_model_and_run_em(d, k, mode, nframes) + class test_datasets(EmTest): """This class tests whether the EM algorithms works using pre-computed datasets.""" From scipy-svn at scipy.org Mon Jun 11 06:12:22 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 05:12:22 -0500 (CDT) Subject: [Scipy-svn] r3096 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070611101222.46FBD39C0B1@new.scipy.org> Author: cdavid Date: 2007-06-11 05:12:10 -0500 (Mon, 11 Jun 2007) New Revision: 3096 Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Convert check calls to test calls in tests, for future convertion to setuptools Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-11 09:32:17 UTC (rev 3095) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-11 10:12:10 UTC (rev 3096) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Sat Jun 09 02:00 PM 2007 J +# Last Change: Mon Jun 11 06:00 PM 2007 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are @@ -68,24 +68,24 @@ 0.00000253261067, 0.00000001526368]) class test_py_implementation(TestDensities): - def _check(self, level, decimal = DEF_DEC): + def _test(self, level, decimal = DEF_DEC): Y = gauss_den(self.X, self.mu, self.va) assert_array_almost_equal(Y, self.Yt, decimal) - def check_2d_diag(self, level = 0): + def test_2d_diag(self, level = 0): self._generate_test_data_2d_diag() - self._check(level) + self._test(level) - def check_2d_full(self, level = 0): + def test_2d_full(self, level = 0): self._generate_test_data_2d_full() - self._check(level) + self._test(level) - def check_py_1d(self, level = 0): + def test_py_1d(self, level = 0): self._generate_test_data_1d() - self._check(level) + self._test(level) class test_c_implementation(TestDensities): - def _check(self, level, decimal = DEF_DEC): + def _test(self, level, decimal = DEF_DEC): try: from pyem._c_densities import gauss_den as c_gauss_den Y = c_gauss_den(self.X, self.mu, self.va) @@ -94,17 +94,17 @@ print "Error while importing C implementation, not tested" print " -> (Import error was %s)" % inst - def check_1d(self, level = 0): + def test_1d(self, level = 0): self._generate_test_data_1d() - self._check(level) + self._test(level) - def check_2d_diag(self, level = 0): + def test_2d_diag(self, level = 0): self._generate_test_data_2d_diag() - self._check(level) + self._test(level) - def check_2d_full(self, level = 0): + def test_2d_full(self, level = 0): self._generate_test_data_2d_full() - self._check(level) + self._test(level) class test_gauss_ell(NumpyTestCase): def test_dim(self): Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 09:32:17 UTC (rev 3095) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 10:12:10 UTC (rev 3096) @@ -43,7 +43,7 @@ class test_full_run(EmTest): """This class only tests whether the algorithms runs. Do not check the results.""" - def check_1d(self, level = 1): + def test_1d(self, level = 1): d = 1 k = 2 mode = 'full' @@ -52,7 +52,7 @@ #seed(1) self._create_model_and_run_em(d, k, mode, nframes) - def check_2d(self, level = 1): + def test_2d(self, level = 1): d = 2 k = 2 mode = 'full' @@ -61,7 +61,7 @@ #seed(1) self._create_model_and_run_em(d, k, mode, nframes) - def check_5d(self, level = 1): + def test_5d(self, level = 1): d = 5 k = 3 mode = 'full' @@ -71,9 +71,9 @@ self._create_model_and_run_em(d, k, mode, nframes) class test_diag_run(EmTest): - """This class only tests whether the algorithms runs. Do not check the + """This class only tests whether the algorithms runs. Do not test the results.""" - def check_1d(self, level = 1): + def test_1d(self, level = 1): d = 1 k = 2 mode = 'diag' @@ -82,7 +82,7 @@ #seed(1) self._create_model_and_run_em(d, k, mode, nframes) - def check_2d(self, level = 1): + def test_2d(self, level = 1): d = 2 k = 2 mode = 'diag' @@ -91,7 +91,7 @@ #seed(1) self._create_model_and_run_em(d, k, mode, nframes) - def check_5d(self, level = 1): + def test_5d(self, level = 1): d = 5 k = 3 mode = 'diag' @@ -103,11 +103,11 @@ class test_datasets(EmTest): """This class tests whether the EM algorithms works using pre-computed datasets.""" - def check_1d_full(self, level = 1): + def test_1d_full(self, level = 1): d = 1 k = 4 mode = 'full' - # Data are exactly the same than in diagonal mode, just check that + # Data are exactly the same than in diagonal mode, just test that # calling full mode works even in 1d, even if it is kind of stupid to # do so dic = load_dataset('diag_1d_4k.mat') @@ -120,7 +120,7 @@ assert_array_equal(gmm.gm.mu, dic['mu']) assert_array_equal(gmm.gm.va, dic['va']) - def check_1d_diag(self, level = 1): + def test_1d_diag(self, level = 1): d = 1 k = 4 mode = 'diag' @@ -134,7 +134,7 @@ assert_array_equal(gmm.gm.mu, dic['mu']) assert_array_equal(gmm.gm.va, dic['va']) - def check_2d_full(self, level = 1): + def test_2d_full(self, level = 1): d = 2 k = 3 mode = 'full' @@ -148,7 +148,7 @@ assert_array_equal(gmm.gm.mu, dic['mu']) assert_array_equal(gmm.gm.va, dic['va']) - def check_2d_diag(self, level = 1): + def test_2d_diag(self, level = 1): d = 2 k = 3 mode = 'diag' From scipy-svn at scipy.org Mon Jun 11 06:34:27 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 05:34:27 -0500 (CDT) Subject: [Scipy-svn] r3097 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070611103427.1172E39C1B1@new.scipy.org> Author: cdavid Date: 2007-06-11 05:34:20 -0500 (Mon, 11 Jun 2007) New Revision: 3097 Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/tests/test_densities.py Log: Add tests for pdf computation in log domain (1st step for logsumexp trick support) Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-11 10:12:10 UTC (rev 3096) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-11 10:34:20 UTC (rev 3097) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Mon Jun 11 03:00 PM 2007 J +# Last Change: Mon Jun 11 07:00 PM 2007 J """This module implements various basic functions related to multivariate gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" @@ -119,7 +119,7 @@ if not log: y = fac * N.exp(y) else: - y = y + log(fac) + y += N.log(fac) return y Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-11 10:12:10 UTC (rev 3096) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-11 10:34:20 UTC (rev 3097) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Jun 11 06:00 PM 2007 J +# Last Change: Mon Jun 11 07:00 PM 2007 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are @@ -72,6 +72,10 @@ Y = gauss_den(self.X, self.mu, self.va) assert_array_almost_equal(Y, self.Yt, decimal) + def _test_log(self, level, decimal = DEF_DEC): + Y = gauss_den(self.X, self.mu, self.va, log = True) + assert_array_almost_equal(N.exp(Y), self.Yt, decimal) + def test_2d_diag(self, level = 0): self._generate_test_data_2d_diag() self._test(level) @@ -80,10 +84,22 @@ self._generate_test_data_2d_full() self._test(level) - def test_py_1d(self, level = 0): + def test_1d(self, level = 0): self._generate_test_data_1d() self._test(level) + def test_2d_diag_log(self, level = 0): + self._generate_test_data_2d_diag() + self._test_log(level) + + def test_2d_full_log(self, level = 0): + self._generate_test_data_2d_full() + self._test_log(level) + + def test_1d_log(self, level = 0): + self._generate_test_data_1d() + self._test_log(level) + class test_c_implementation(TestDensities): def _test(self, level, decimal = DEF_DEC): try: From scipy-svn at scipy.org Tue Jun 12 00:04:27 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 11 Jun 2007 23:04:27 -0500 (CDT) Subject: [Scipy-svn] r3098 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070612040427.3111439C092@new.scipy.org> Author: cdavid Date: 2007-06-11 23:04:14 -0500 (Mon, 11 Jun 2007) New Revision: 3098 Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/tests/test_densities.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Add logsumexp function + tests. Not used in the code yet, though Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-11 10:34:20 UTC (rev 3097) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-12 04:04:14 UTC (rev 3098) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Mon Jun 11 07:00 PM 2007 J +# Last Change: Tue Jun 12 12:00 PM 2007 J """This module implements various basic functions related to multivariate gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" @@ -268,7 +268,13 @@ return elps[0, :], elps[1, :] -def multiple_gauss_den(data, mu, va): +def logsumexp(x): + """Compute log(sum(exp(a), 1)) while avoiding underflow.""" + axis = 1 + mc = N.max(x, axis) + return mc + N.log(N.sum(N.exp(x-mc[:, N.newaxis]), axis)) + +def multiple_gauss_den(data, mu, va, log = False): """Helper function to generate several Gaussian pdf (different parameters) at the same points @@ -283,6 +289,8 @@ variance of the pdf. One row per different component for diagonal covariance (k, d), or d rows per component for full matrix pdf (k*d,d). + log : boolean + if True, returns the log-pdf instead of the pdf. :Returns: Returns a (n, k) array, each column i being the pdf of the ith mean and @@ -297,11 +305,11 @@ y = N.zeros((K, n)) if N.size(mu) == N.size(va): for i in range(K): - y[i] = gauss_den(data, mu[i, :], va[i, :]) + y[i] = gauss_den(data, mu[i, :], va[i, :], log) return y.T else: for i in range(K): - y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :]) + y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :], log) return y.T if __name__ == "__main__": Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-11 10:34:20 UTC (rev 3097) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Jun 11 04:00 PM 2007 J +# Last Change: Tue Jun 12 11:00 AM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using @@ -101,8 +101,6 @@ for i in range(k): va[i*d:i*d+d] = N.dot( va[i*d:i*d+d], va[i*d:i*d+d].T) - #raise GmmParamError("init_random not implemented for "\ - # "mode %s yet" % self.gm.mode) self.gm.set_param(w, mu, va) @@ -150,11 +148,10 @@ self.isinit = False self.initst = init - def sufficient_statistics(self, data): + def compute_responsabilities(self, data): """Compute responsabilities. - Return normalized and non-normalized sufficient statistics from the - model. + Return normalized and non-normalized respondabilities for the model. Note ---- @@ -325,11 +322,11 @@ like = N.zeros(maxiter) # Em computation, with computation of the likelihood - g, tgd = model.sufficient_statistics(data) + g, tgd = model.compute_responsabilities(data) like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) model.update_em(data, g) for i in range(1, maxiter): - g, tgd = model.sufficient_statistics(data) + g, tgd = model.compute_responsabilities(data) like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) model.update_em(data, g) if has_em_converged(like[i], like[i-1], thresh): @@ -337,21 +334,6 @@ return like -#def regularize_diag(variance, alpha = _DEF_ALPHA): -# delta = N.sum(variance) / variance.size -# if delta > _MIN_DBL_DELTA: -# return variance + alpha * delta -# else: -# return variance + alpha * _MIN_DBL_DELTA -# -#def regularize_full(variance): -# # Trace of a positive definite matrix is always > 0 -# delta = N.trace(variance) / variance.shape[0] -# if delta > _MIN_DBL_DELTA: -# return variance + alpha * delta -# else: -# return variance + alpha * _MIN_DBL_DELTA - # Misc functions def bic(lk, deg, n): """ Expects lk to be log likelihood """ @@ -370,115 +352,6 @@ if __name__ == "__main__": pass - ## import copy - ## #============================= - ## # Simple GMM with 5 components - ## #============================= - - ## #+++++++++++++++++++++++++++++ - ## # Meta parameters of the model - ## # - k: Number of components - ## # - d: dimension of each Gaussian - ## # - mode: Mode of covariance matrix: full or diag - ## # - nframes: number of frames (frame = one data point = one - ## # row of d elements - ## k = 2 - ## d = 1 - ## mode = 'full' - ## nframes = 1e3 - - ## #+++++++++++++++++++++++++++++++++++++++++++ - ## # Create an artificial GMM model, samples it - ## #+++++++++++++++++++++++++++++++++++++++++++ - ## print "Generating the mixture" - ## # Generate a model with k components, d dimensions - ## w, mu, va = GM.gen_param(d, k, mode, spread = 3) - ## gm = GM(d, k, mode) - ## gm.set_param(w, mu, va) - - ## # Sample nframes frames from the model - ## data = gm.sample(nframes) - - ## #++++++++++++++++++++++++ - ## # Learn the model with EM - ## #++++++++++++++++++++++++ - - ## # Init the model - ## print "Init a model for learning, with kmean for initialization" - ## lgm = GM(d, k, mode) - ## gmm = GMM(lgm, 'kmean') - ## gmm.init(data) - - ## # Keep the initialized model for drawing - ## gm0 = copy.copy(lgm) - - ## # The actual EM, with likelihood computation - ## niter = 10 - ## like = N.zeros(niter) - - ## print "computing..." - ## for i in range(niter): - ## g, tgd = gmm.sufficient_statistics(data) - ## like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) - ## gmm.update_em(data, g) - ## # # Alternative form, by using EM class: as the EM class - ## # # is quite rudimentary now, it is not very useful, just save - ## # # a few lines - ## # em = EM() - ## # like = em.train(data, gmm, niter) - - ## #+++++++++++++++ - ## # Draw the model - ## #+++++++++++++++ - ## print "drawing..." - ## import pylab as P - ## P.subplot(2, 1, 1) - - ## if not d == 1: - ## # Draw what is happening - ## P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_') - - ## # Real confidence ellipses - ## Xre, Yre = gm.conf_ellipses() - ## P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides') - ## for i in range(1,k): - ## P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_') - - ## # Initial confidence ellipses as found by kmean - ## X0e, Y0e = gm0.conf_ellipses() - ## P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides') - ## for i in range(1,k): - ## P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_') - - ## # Values found by EM - ## Xe, Ye = lgm.conf_ellipses() - ## P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by" - ## "EM") - ## for i in range(1,k): - ## P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_') - ## P.legend(loc = 0) - ## else: - ## # Real confidence ellipses - ## h = gm.plot1d() - ## [i.set_color('g') for i in h['pdf']] - ## h['pdf'][0].set_label('true pdf') - - ## # Initial confidence ellipses as found by kmean - ## h0 = gm0.plot1d() - ## [i.set_color('k') for i in h0['pdf']] - ## h0['pdf'][0].set_label('initial pdf') - - ## # Values found by EM - ## hl = lgm.plot1d(fill = 1, level = 0.66) - ## [i.set_color('r') for i in hl['pdf']] - ## hl['pdf'][0].set_label('pdf found by EM') - - ## P.legend(loc = 0) - - ## P.subplot(2, 1, 2) - ## P.plot(like) - ## P.title('log likelihood') - ## # #++++++++++++++++++ ## # # Export the figure ## # #++++++++++++++++++ Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-11 10:34:20 UTC (rev 3097) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 04:04:14 UTC (rev 3098) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Jun 11 07:00 PM 2007 J +# Last Change: Tue Jun 12 12:00 PM 2007 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are @@ -100,6 +100,58 @@ self._generate_test_data_1d() self._test_log(level) +class test_py_logsumexp(TestDensities): + """Class to compare logsumexp vs naive implementation.""" + def test_underlow(self): + """This function checks that logsumexp works as expected.""" + # We check wether naive implementation would underflow, to be sure we + # are actually testing something here. + N.seterr(under='raise') + try: + a = N.array([[-1000]]) + self.naive_logsumexp(a) + raise AssertionError("expected to catch underflow, we should not be here") + except FloatingPointError, e: + print "Catching underflow, as expected" + assert pyem.densities.logsumexp(a) == -1000. + try: + a = N.array([[-1000, -1000, -1000]]) + self.naive_logsumexp(a) + raise AssertionError("expected to catch underflow, we should not be here") + except FloatingPointError, e: + print "Catching underflow, as expected" + assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771) + + def naive_logsumexp(self, data): + return N.log(N.sum(N.exp(data), 1)) + + def test_1d(self): + data = N.random.randn(1e1)[:, N.newaxis] + mu = N.array([[-5], [-6]]) + va = N.array([[0.1], [0.1]]) + y = pyem.densities.multiple_gauss_den(data, mu, va, log = True) + a1 = pyem.densities.logsumexp(y) + a2 = self.naive_logsumexp(y) + assert_array_equal(a1, a2) + + def test_2d_full(self): + data = N.random.randn(1e1, 2) + mu = N.array([[-3, -1], [3, 3]]) + va = N.array([[1.1, 0.4], [0.6, 0.8], [0.4, 0.2], [0.3, 0.9]]) + y = pyem.densities.multiple_gauss_den(data, mu, va, log = True) + a1 = pyem.densities.logsumexp(y) + a2 = self.naive_logsumexp(y) + assert_array_almost_equal(a1, a2, DEF_DEC) + + def test_2d_diag(self): + data = N.random.randn(1e1, 2) + mu = N.array([[-3, -1], [3, 3]]) + va = N.array([[1.1, 0.4], [0.6, 0.8]]) + y = pyem.densities.multiple_gauss_den(data, mu, va, log = True) + a1 = pyem.densities.logsumexp(y) + a2 = self.naive_logsumexp(y) + assert_array_almost_equal(a1, a2, DEF_DEC) + class test_c_implementation(TestDensities): def _test(self, level, decimal = DEF_DEC): try: Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-11 10:34:20 UTC (rev 3097) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Jun 11 06:00 PM 2007 J +# Last Change: Tue Jun 12 11:00 AM 2007 J # For now, just test that all mode/dim execute correctly From scipy-svn at scipy.org Tue Jun 12 08:21:14 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 12 Jun 2007 07:21:14 -0500 (CDT) Subject: [Scipy-svn] r3099 - in trunk/Lib/sandbox/pyem: . tests Message-ID: <20070612122114.BC3F739C19F@new.scipy.org> Author: cdavid Date: 2007-06-12 07:21:04 -0500 (Tue, 12 Jun 2007) New Revision: 3099 Modified: trunk/Lib/sandbox/pyem/densities.py trunk/Lib/sandbox/pyem/gauss_mix.py trunk/Lib/sandbox/pyem/gmm_em.py trunk/Lib/sandbox/pyem/tests/test_densities.py trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Add function to compute log responsabilities with logsumexp. Modified: trunk/Lib/sandbox/pyem/densities.py =================================================================== --- trunk/Lib/sandbox/pyem/densities.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/densities.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,7 +1,7 @@ #! /usr/bin/python # # Copyrighted David Cournapeau -# Last Change: Tue Jun 12 12:00 PM 2007 J +# Last Change: Tue Jun 12 03:00 PM 2007 J """This module implements various basic functions related to multivariate gaussian, such as pdf estimation, confidence interval/ellipsoids, etc...""" @@ -167,14 +167,6 @@ inva = lin.inv(va) fac = 1 / N.sqrt( (2*N.pi) ** d * N.fabs(lin.det(va))) - # # Slow version - # n = N.size(x, 0) - # y = N.zeros(n) - # for i in range(n): - # y[i] = N.dot(x[i,:], - # N.dot(inva, N.transpose(x[i,:]))) - # y *= -0.5 - # we are using a trick with sum to "emulate" # the matrix multiplication inva * x without any explicit loop y = N.dot((x-mu), inva) Modified: trunk/Lib/sandbox/pyem/gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Mon Jun 11 06:00 PM 2007 J +# Last Change: Tue Jun 12 03:00 PM 2007 J """Module implementing GM, a class which represents Gaussian mixtures. @@ -12,7 +12,7 @@ import numpy as N from numpy.random import randn, rand import numpy.linalg as lin -import densities +import densities as D import misc # Right now, two main usages of a Gaussian Model are possible @@ -276,13 +276,13 @@ Ye = [] if self.mode == 'diag': for i in range(self.k): - xe, ye = densities.gauss_ell(self.mu[i, :], self.va[i, :], + xe, ye = D.gauss_ell(self.mu[i, :], self.va[i, :], dim, npoints, level) Xe.append(xe) Ye.append(ye) elif self.mode == 'full': for i in range(self.k): - xe, ye = densities.gauss_ell(self.mu[i, :], + xe, ye = D.gauss_ell(self.mu[i, :], self.va[i*self.d:i*self.d+self.d, :], dim, npoints, level) Xe.append(xe) @@ -317,6 +317,7 @@ raise GmParamError("Unknown mode") return True + @classmethod def gen_param(cls, d, nc, varmode = 'diag', spread = 1): """Generate random, valid parameters for a gaussian mixture model. @@ -366,6 +367,27 @@ # def _regularize(self): # raise NotImplemented("No regularization") + def pdf(self, x, log = False): + """Computes the pdf of the model at given points. + + :Parameters: + x : ndarray + points where to estimate the pdf. One row for one + multi-dimensional sample (eg to estimate the pdf at 100 + different points in 10 dimension, data's shape should be (100, + 20)). + log : bool + If true, returns the log pdf instead of the pdf. + + :Returns: + y : ndarray + the pdf at points x.""" + if log: + return D.logsumexp(N.sum( + D.multiple_gauss_den(x, self.mu, self.va, log = True) * self.w, 1)) + else: + return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1) + #================= # Plotting methods #================= @@ -572,7 +594,7 @@ # XXX refactor computing pdf dmu = self.mu[:, dim] dva = self._get_va(dim) - den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w + den = D.multiple_gauss_den(xdata, dmu, dva) * self.w den = N.sum(den, 1) den = den.reshape(len(rangey), len(rangex)) Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Tue Jun 12 11:00 AM 2007 J +# Last Change: Tue Jun 12 08:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using @@ -159,7 +159,7 @@ knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] - This is basically the E step of EM for GMM.""" + This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va) # multiply by the weight @@ -169,6 +169,28 @@ return gd, tgd + def compute_log_responsabilities(self, data): + """Compute log responsabilities. + + Return normalized and non-normalized responsabilities for the model (in + the log domain) + + Note + ---- + Computes the latent variable distribution (a posteriori probability) + knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, + i) = P[state = i | observation = data(t); w, mu, va] + + This is basically the E step of EM for finite mixtures.""" + # compute the gaussian pdf + tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True) + # multiply by the weight + tgd += N.log(self.gm.w) + # Normalize to get a pdf + gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] + + return gd, tgd + def update_em(self, data, gamma): """Computes update of the Gaussian Mixture Model (M step) from the a posteriori pdf, computed by gmm_posterior Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Jun 12 12:00 PM 2007 J +# Last Change: Tue Jun 12 08:00 PM 2007 J # TODO: # - having "fake tests" to check that all mode (scalar, diag and full) are @@ -21,7 +21,7 @@ # import modules that are located in the same directory as this file. restore_path() -DEF_DEC = 12 +from testcommon import DEF_DEC class TestDensities(NumpyTestCase): def _generate_test_data_1d(self): Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Mon Jun 11 03:00 PM 2007 J +# Last Change: Tue Jun 12 03:00 PM 2007 J # For now, just test that all mode/dim execute correctly @@ -10,6 +10,7 @@ set_package_path() from pyem import GM +from pyem.densities import multiple_gauss_den restore_path() class test_BasicFunc(NumpyTestCase): @@ -58,6 +59,16 @@ sva = gm._get_va(dim) assert N.all(sva == tva) + def test_2d_diag_pdf(self): + d = 2 + w = N.array([0.4, 0.6]) + mu = N.array([[0., 2], [-1, -2]]) + va = N.array([[1, 0.5], [0.5, 1]]) + x = N.random.randn(100, 2) + gm = GM.fromvalues(w, mu, va) + y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1) + y2 = gm.pdf(x) + assert_array_almost_equal(y1, y2) if __name__ == "__main__": NumpyTest().run() Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 12:21:04 UTC (rev 3099) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Jun 12 11:00 AM 2007 J +# Last Change: Tue Jun 12 09:00 PM 2007 J # For now, just test that all mode/dim execute correctly @@ -12,6 +12,8 @@ from pyem import GMM, GM, EM restore_path() +from testcommon import DEF_DEC + def load_dataset(filename): from scipy.io import loadmat dic = loadmat(filename, squeeze_me = False) @@ -162,5 +164,46 @@ assert_array_equal(gmm.gm.mu, dic['mu']) assert_array_equal(gmm.gm.va, dic['va']) +class test_log_domain(EmTest): + """This class tests whether the GMM works in log domain.""" + def _test_common(self, d, k, mode): + dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k)) + + gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) + gmm = GMM(gm, 'test') + + a, na = gmm.compute_responsabilities(dic['data']) + la, nla = gmm.compute_log_responsabilities(dic['data']) + + ta = N.log(a) + tna = N.log(na) + if not N.all(N.isfinite(ta)): + print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k) + else: + assert_array_almost_equal(ta, la, DEF_DEC) + + if not N.all(N.isfinite(tna)): + print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k) + else: + assert_array_almost_equal(tna, nla, DEF_DEC) + + def test_2d_diag(self, level = 1): + d = 2 + k = 3 + mode = 'diag' + self._test_common(d, k, mode) + + def test_1d_full(self, level = 1): + d = 1 + k = 4 + mode = 'diag' + self._test_common(d, k, mode) + + def test_2d_full(self, level = 1): + d = 2 + k = 3 + mode = 'full' + self._test_common(d, k, mode) + if __name__ == "__main__": NumpyTest().run() From scipy-svn at scipy.org Wed Jun 13 06:08:20 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 05:08:20 -0500 (CDT) Subject: [Scipy-svn] r3100 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070613100820.B5BFA39C0D3@new.scipy.org> Author: cdavid Date: 2007-06-13 05:08:00 -0500 (Wed, 13 Jun 2007) New Revision: 3100 Added: trunk/Lib/sandbox/pyem/tests/testcommon.py Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Fix importing datasets in pyem/test Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 12:21:04 UTC (rev 3099) +++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-13 10:08:00 UTC (rev 3100) @@ -19,10 +19,9 @@ #Optional: set_local_path() # import modules that are located in the same directory as this file. +from testcommon import DEF_DEC restore_path() -from testcommon import DEF_DEC - class TestDensities(NumpyTestCase): def _generate_test_data_1d(self): self.va = 2.0 @@ -106,21 +105,24 @@ """This function checks that logsumexp works as expected.""" # We check wether naive implementation would underflow, to be sure we # are actually testing something here. - N.seterr(under='raise') + errst = N.seterr(under='raise') try: - a = N.array([[-1000]]) - self.naive_logsumexp(a) - raise AssertionError("expected to catch underflow, we should not be here") - except FloatingPointError, e: - print "Catching underflow, as expected" - assert pyem.densities.logsumexp(a) == -1000. - try: - a = N.array([[-1000, -1000, -1000]]) - self.naive_logsumexp(a) - raise AssertionError("expected to catch underflow, we should not be here") - except FloatingPointError, e: - print "Catching underflow, as expected" - assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771) + try: + a = N.array([[-1000]]) + self.naive_logsumexp(a) + raise AssertionError("expected to catch underflow, we should not be here") + except FloatingPointError, e: + print "Catching underflow, as expected" + assert pyem.densities.logsumexp(a) == -1000. + try: + a = N.array([[-1000, -1000, -1000]]) + self.naive_logsumexp(a) + raise AssertionError("expected to catch underflow, we should not be here") + except FloatingPointError, e: + print "Catching underflow, as expected" + assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771) + finally: + N.seterr(under=errst['under']) def naive_logsumexp(self, data): return N.log(N.sum(N.exp(data), 1)) Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 12:21:04 UTC (rev 3099) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-13 10:08:00 UTC (rev 3100) @@ -4,6 +4,7 @@ # For now, just test that all mode/dim execute correctly import sys +import os from numpy.testing import * import numpy as N @@ -12,11 +13,15 @@ from pyem import GMM, GM, EM restore_path() +set_local_path() +# import modules that are located in the same directory as this file. from testcommon import DEF_DEC +curpath = sys.path[0] +restore_path() def load_dataset(filename): from scipy.io import loadmat - dic = loadmat(filename, squeeze_me = False) + dic = loadmat(os.path.join(curpath, filename), squeeze_me = False) dic['w0'] = dic['w0'].squeeze() dic['w'] = dic['w'].squeeze() dic['tw'] = dic['tw'].squeeze() Added: trunk/Lib/sandbox/pyem/tests/testcommon.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/testcommon.py 2007-06-12 12:21:04 UTC (rev 3099) +++ trunk/Lib/sandbox/pyem/tests/testcommon.py 2007-06-13 10:08:00 UTC (rev 3100) @@ -0,0 +1 @@ +DEF_DEC = 12 From scipy-svn at scipy.org Wed Jun 13 06:26:10 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 05:26:10 -0500 (CDT) Subject: [Scipy-svn] r3101 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070613102610.27CC239C1B2@new.scipy.org> Author: cdavid Date: 2007-06-13 05:26:06 -0500 (Wed, 13 Jun 2007) New Revision: 3101 Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: More fix for broken tests in gmm_em Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-13 10:08:00 UTC (rev 3100) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-13 10:26:06 UTC (rev 3101) @@ -1,5 +1,5 @@ #! /usr/bin/env python -# Last Change: Tue Jun 12 09:00 PM 2007 J +# Last Change: Wed Jun 13 07:00 PM 2007 J # For now, just test that all mode/dim execute correctly @@ -123,9 +123,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w']) - assert_array_equal(gmm.gm.mu, dic['mu']) - assert_array_equal(gmm.gm.va, dic['va']) + assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC) def test_1d_diag(self, level = 1): d = 1 @@ -137,9 +137,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w']) - assert_array_equal(gmm.gm.mu, dic['mu']) - assert_array_equal(gmm.gm.va, dic['va']) + assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC) def test_2d_full(self, level = 1): d = 2 @@ -151,9 +151,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w']) - assert_array_equal(gmm.gm.mu, dic['mu']) - assert_array_equal(gmm.gm.va, dic['va']) + assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC) def test_2d_diag(self, level = 1): d = 2 @@ -165,9 +165,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w']) - assert_array_equal(gmm.gm.mu, dic['mu']) - assert_array_equal(gmm.gm.va, dic['va']) + assert_array__almost_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array__almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array__almost_equal(gmm.gm.va, dic['va'], DEF_DEC) class test_log_domain(EmTest): """This class tests whether the GMM works in log domain.""" @@ -183,12 +183,12 @@ ta = N.log(a) tna = N.log(na) if not N.all(N.isfinite(ta)): - print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k) + print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(ta, la, DEF_DEC) if not N.all(N.isfinite(tna)): - print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k) + print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(tna, nla, DEF_DEC) From scipy-svn at scipy.org Wed Jun 13 06:29:33 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 05:29:33 -0500 (CDT) Subject: [Scipy-svn] r3102 - trunk/Lib/sandbox/pyem/tests Message-ID: <20070613102933.0BDAF39C1B2@new.scipy.org> Author: cdavid Date: 2007-06-13 05:29:29 -0500 (Wed, 13 Jun 2007) New Revision: 3102 Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py Log: Trivial fix for typo in pyem tests. Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-13 10:26:06 UTC (rev 3101) +++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-13 10:29:29 UTC (rev 3102) @@ -137,9 +137,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC) - assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC) - assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC) + assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC) def test_2d_full(self, level = 1): d = 2 @@ -151,9 +151,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC) - assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC) - assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC) + assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC) def test_2d_diag(self, level = 1): d = 2 @@ -165,9 +165,9 @@ gmm = GMM(gm, 'test') EM().train(dic['data'], gmm) - assert_array__almost_equal(gmm.gm.w, dic['w'], DEF_DEC) - assert_array__almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) - assert_array__almost_equal(gmm.gm.va, dic['va'], DEF_DEC) + assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) + assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) + assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC) class test_log_domain(EmTest): """This class tests whether the GMM works in log domain.""" From scipy-svn at scipy.org Wed Jun 13 09:56:35 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 08:56:35 -0500 (CDT) Subject: [Scipy-svn] r3103 - trunk/Lib/sandbox/maskedarray Message-ID: <20070613135635.5041839C1C5@new.scipy.org> Author: pierregm Date: 2007-06-13 08:56:32 -0500 (Wed, 13 Jun 2007) New Revision: 3103 Modified: trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/mrecords.py Log: mrecords : fixed a bug in .filled Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2007-06-13 10:29:29 UTC (rev 3102) +++ trunk/Lib/sandbox/maskedarray/core.py 2007-06-13 13:56:32 UTC (rev 3103) @@ -2638,8 +2638,10 @@ if 1: x = arange(10) assert(x.ctypes.data == x.filled().ctypes.data) - if 1: - a = array([1,2,3,4],mask=[0,0,0,0],small_mask=False) + if 0: + a = array([1,2,3,4],mask=[0,0,0,0],small_mask=True) + a[1] = masked + a[1] = 1 assert(a.ravel()._mask, [0,0,0,0]) assert(a.compressed(), a) a[0] = masked Modified: trunk/Lib/sandbox/maskedarray/mrecords.py =================================================================== --- trunk/Lib/sandbox/maskedarray/mrecords.py 2007-06-13 10:29:29 UTC (rev 3102) +++ trunk/Lib/sandbox/maskedarray/mrecords.py 2007-06-13 13:56:32 UTC (rev 3103) @@ -341,13 +341,13 @@ If `fill_value` is None, uses self.fill_value. """ _localdict = self.__dict__ - d = _localdict['_data'] + d = self._data fm = _localdict['_fieldmask'] if not numeric.asarray(fm, dtype=bool_).any(): return d # if fill_value is None: - value = _localdict['fill_value'] + value = _localdict['_fill_value'] else: value = fill_value if numeric.size(value) == 1: From scipy-svn at scipy.org Wed Jun 13 19:26:50 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 18:26:50 -0500 (CDT) Subject: [Scipy-svn] r3104 - in trunk/Lib/io: . tests Message-ID: <20070613232650.0BC6A39C036@new.scipy.org> Author: wnbell Date: 2007-06-13 18:26:47 -0500 (Wed, 13 Jun 2007) New Revision: 3104 Modified: trunk/Lib/io/mmio.py trunk/Lib/io/tests/test_mmio.py Log: fixed sparse coordinate matrix indices to be base 1 as in the MM standard added unittest to prevent problem in the future Modified: trunk/Lib/io/mmio.py =================================================================== --- trunk/Lib/io/mmio.py 2007-06-13 13:56:32 UTC (rev 3103) +++ trunk/Lib/io/mmio.py 2007-06-13 23:26:47 UTC (rev 3104) @@ -333,11 +333,10 @@ assert symm=='general',`symm` if field in ['real','integer']: for i in range(entries): - target.write(format % (a.rowcol(i)+(a.getdata(i),))) + target.write(format % (a.row[i]+1,a.col[i]+1,a.data[i])) elif field=='complex': for i in range(entries): - value = a.getdata(i) - target.write(format % ((a.rowcol(i))+(real(value),imag(value)))) + target.write(format % (a.row[i]+1,a.col[i]+1,reak(a.data[i]),imag(a.data[i]))) elif field=='pattern': raise NotImplementedError,`field` else: Modified: trunk/Lib/io/tests/test_mmio.py =================================================================== --- trunk/Lib/io/tests/test_mmio.py 2007-06-13 13:56:32 UTC (rev 3103) +++ trunk/Lib/io/tests/test_mmio.py 2007-06-13 23:26:47 UTC (rev 3104) @@ -6,6 +6,7 @@ set_package_path() from io.mmio import mminfo,mmread,mmwrite +import scipy restore_path() class test_mmio_array(NumpyTestCase): @@ -151,5 +152,25 @@ b = mmread(fn).todense() assert_array_almost_equal(a,b) + def check_simple_write_read(self): + I = array([0, 0, 1, 2, 3, 3, 3, 4]) + J = array([0, 3, 1, 2, 1, 3, 4, 4]) + V = array([ 1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0 ]) + + b = scipy.sparse.coo_matrix((V,(I,J)),dims=(5,5)) + + fn = mktemp() + mmwrite(fn,b) + + assert_equal(mminfo(fn),(5,5,8,'coordinate','real','general')) + a = [[1, 0, 0, 6, 0], + [0, 10.5, 0, 0, 0], + [0, 0, .015, 0, 0], + [0, 250.5, 0, -280, 33.32], + [0, 0, 0, 0, 12]] + b = mmread(fn).todense() + assert_array_almost_equal(a,b) + + if __name__ == "__main__": NumpyTest().run() From scipy-svn at scipy.org Wed Jun 13 19:44:05 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 13 Jun 2007 18:44:05 -0500 (CDT) Subject: [Scipy-svn] r3105 - trunk/Lib/io Message-ID: <20070613234405.C168D39C06D@new.scipy.org> Author: wnbell Date: 2007-06-13 18:44:04 -0500 (Wed, 13 Jun 2007) New Revision: 3105 Modified: trunk/Lib/io/mmio.py Log: fixed bug in MM io for non-COO sparse formats Modified: trunk/Lib/io/mmio.py =================================================================== --- trunk/Lib/io/mmio.py 2007-06-13 23:26:47 UTC (rev 3104) +++ trunk/Lib/io/mmio.py 2007-06-13 23:44:04 UTC (rev 3105) @@ -333,10 +333,10 @@ assert symm=='general',`symm` if field in ['real','integer']: for i in range(entries): - target.write(format % (a.row[i]+1,a.col[i]+1,a.data[i])) + target.write(format % (a.rowcol(i)[0] + 1,a.rowcol(i)[1] + 1,a.getdata(i))) #convert base 0 to base 1 elif field=='complex': for i in range(entries): - target.write(format % (a.row[i]+1,a.col[i]+1,reak(a.data[i]),imag(a.data[i]))) + target.write(format % (a.rowcol(i)[0] + 1,a.rowcol(i)[1] + 1,real(a.getdata(i)),imag(a.getdata(i)))) elif field=='pattern': raise NotImplementedError,`field` else: From scipy-svn at scipy.org Thu Jun 14 21:04:34 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 14 Jun 2007 20:04:34 -0500 (CDT) Subject: [Scipy-svn] r3106 - trunk/Lib/sandbox/timeseries/lib Message-ID: <20070615010434.8C49E39C0E2@new.scipy.org> Author: mattknox_ca Date: 2007-06-14 20:00:10 -0500 (Thu, 14 Jun 2007) New Revision: 3106 Modified: trunk/Lib/sandbox/timeseries/lib/moving_funcs.py Log: reversed params to expmave_sub . Behaviour of ufuncs created from frompyfunc seems to have changed at some point causing this to be reversed Modified: trunk/Lib/sandbox/timeseries/lib/moving_funcs.py =================================================================== --- trunk/Lib/sandbox/timeseries/lib/moving_funcs.py 2007-06-13 23:44:04 UTC (rev 3105) +++ trunk/Lib/sandbox/timeseries/lib/moving_funcs.py 2007-06-15 01:00:10 UTC (rev 3106) @@ -202,7 +202,7 @@ # k = 2./float(span + 1) def expmave_sub(a, b): - return b + k * (a - b) + return a + k * (b - a) # data._data.flat = N.frompyfunc(expmave_sub, 2, 1).accumulate(_data) if ismasked: From scipy-svn at scipy.org Fri Jun 15 12:15:08 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Jun 2007 11:15:08 -0500 (CDT) Subject: [Scipy-svn] r3107 - in trunk/Lib/sandbox/maskedarray: . tests Message-ID: <20070615161508.08CC439C055@new.scipy.org> Author: pierregm Date: 2007-06-15 11:14:55 -0500 (Fri, 15 Jun 2007) New Revision: 3107 Modified: trunk/Lib/sandbox/maskedarray/core.py trunk/Lib/sandbox/maskedarray/extras.py trunk/Lib/sandbox/maskedarray/mrecords.py trunk/Lib/sandbox/maskedarray/tests/test_core.py trunk/Lib/sandbox/maskedarray/tests/test_extras.py trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py Log: core : __setitem__ now calls __setmask__ when value is masked core : added a test in filled on m.any() extras : make_rowcols : make sure the mask is copied to avoid propagation mrecords : fixed a couple of bugs Modified: trunk/Lib/sandbox/maskedarray/core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/core.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/core.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -1132,11 +1132,13 @@ # raise IndexError, msg #.... if value is masked: - if self._mask is nomask: - self._mask = make_mask_none(self.shape) - else: - self._mask = self._mask.copy() - self._mask[indx] = True + m = self._mask + if m is nomask: + m = make_mask_none(self.shape) +# else: +# m = m.copy() + m[indx] = True + self.__setmask__(m) return #.... dval = numeric.asarray(value).astype(self.dtype) @@ -1261,7 +1263,7 @@ If `fill_value` is None, uses self.fill_value. """ m = self._mask - if m is nomask: + if m is nomask or not m.any(): return self._data # if fill_value is None: @@ -2645,4 +2647,13 @@ assert(a.ravel()._mask, [0,0,0,0]) assert(a.compressed(), a) a[0] = masked - assert(a.compressed()._mask, [0,0,0]) \ No newline at end of file + assert(a.compressed()._mask, [0,0,0]) + if 1: + x = array(0, mask=0) + I = x.ctypes.data + J = x.filled().ctypes.data + print (I,J) + x = array([0,0], mask=0) + (I,J) = (x.ctypes.data, x.filled().ctypes.data) + print (I,J) + \ No newline at end of file Modified: trunk/Lib/sandbox/maskedarray/extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/extras.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/extras.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -411,6 +411,7 @@ if m is nomask or not m.any(): return a maskedval = m.nonzero() + a._mask = a._mask.copy() if not axis: a[function_base.unique(maskedval[0])] = masked if axis in [None, 1, -1]: @@ -648,7 +649,15 @@ ################################################################################ if __name__ == '__main__': # + import numpy as N + from maskedarray.testutils import assert_equal if 1: - x = arange(10) - x[0] = masked - print dot(x,x) \ No newline at end of file + n = N.arange(1,7) + # + m = [1,0,0,0,0,0] + a = masked_array(n, mask=m).reshape(2,3) + b = masked_array(n, mask=m).reshape(3,2) + c = dot(a,b, True) + assert_equal(c.mask, [[1,1],[1,0]]) + c = dot(a,b,False) + assert_equal(c, N.dot(a.filled(0), b.filled(0))) \ No newline at end of file Modified: trunk/Lib/sandbox/maskedarray/mrecords.py =================================================================== --- trunk/Lib/sandbox/maskedarray/mrecords.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/mrecords.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -115,7 +115,7 @@ if names is not None: descr = _checknames(descr,names) _names = descr.names - mdescr = [(t[0],'|b1') for t in descr.descr] + mdescr = [(n,'|b1') for n in _names] # shape = numeric.asarray(data[0]).shape if isinstance(shape, int): @@ -129,7 +129,11 @@ _fieldmask = data._fieldmask elif isinstance(data, recarray): _data = data - _fieldmask = mask + if mask is nomask: + _fieldmask = data.astype(mdescr) + _fieldmask.flat = tuple([False]*len(mdescr)) + else: + _fieldmask = mask else: _data = recarray(shape, dtype=descr) _fieldmask = recarray(shape, dtype=mdescr) @@ -179,7 +183,7 @@ _data = self._data _mask = self._fieldmask obj = numeric.asarray(_data.__getattribute__(attr)).view(MaskedArray) - obj._mask = make_mask(_mask.__getattribute__(attr)) + obj.__setmask__(_mask.__getattribute__(attr)) return obj raise AttributeError,"No attribute '%s' !" % attr @@ -232,6 +236,10 @@ obj = ndarray.__getitem__(self, indx).view(type(self)) obj._fieldmask = _localdict['_fieldmask'][indx] return obj + #............................................ + def __setitem__(self, indx, value): + """Sets the given record to value.""" + MaskedArray.__setitem__(self, indx, value) # def __getslice__(self, i, j): # """Returns the slice described by [i,j].""" @@ -243,13 +251,12 @@ def __setslice__(self, i, j, value): """Sets the slice described by [i,j] to `value`.""" _localdict = self.__dict__ - d = self._data m = _localdict['_fieldmask'] names = self.dtype.names if value is masked: for n in names: - m[i:j][n] = masked + m[i:j][n] = True elif not self._hardmask: fval = filled(value) mval = getmaskarray(value) @@ -484,7 +491,7 @@ descr = parsed._descr try: - retval = numeric.array(reclist, dtype = descr) + retval = numeric.array(reclist, dtype = descr).view(recarray) except TypeError: # list of lists instead of list of tuples if (shape is None or shape == 0): shape = len(reclist)*2 @@ -645,13 +652,27 @@ ################################################################################ if __name__ == '__main__': import numpy as N + from maskedarray.testutils import assert_equal if 1: d = N.arange(5) m = MA.make_mask([1,0,0,1,1]) base_d = N.r_[d,d[::-1]].reshape(2,-1).T base_m = N.r_[[m, m[::-1]]].T base = MA.array(base_d, mask=base_m) - mrecord = fromarrays(base.T,) - + mrecord = fromarrays(base.T,dtype=[('a',N.float_),('b',N.float_)]) mrec = MaskedRecords(mrecord) + # + mrec.a[3:] = 5 + assert_equal(mrec.a, [0,1,2,5,5]) + assert_equal(mrec.a._mask, [1,0,0,0,0]) + # + mrec.b[3:] = masked + assert_equal(mrec.b, [4,3,2,1,0]) + assert_equal(mrec.b._mask, [1,1,0,1,1]) + # + mrec[:2] = masked + assert_equal(mrec._mask, [1,1,0,0,0]) + mrec[-1] = masked + assert_equal(mrec._mask, [1,1,0,0,1]) + \ No newline at end of file Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_core.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/tests/test_core.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -240,7 +240,6 @@ assert(minimum(xm, xm).mask) assert(xm.filled().dtype is xm.data.dtype) x = array(0, mask=0) -# assert(x.filled() is x.data) assert_equal(x.filled().ctypes.data, x.ctypes.data) assert_equal(str(xm), str(masked_print_option)) #......................... Modified: trunk/Lib/sandbox/maskedarray/tests/test_extras.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/tests/test_extras.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -219,18 +219,27 @@ m = [1,0,0,0,0,0] a = masked_array(n, mask=m).reshape(2,3) b = masked_array(n, mask=m).reshape(3,2) - c = dot(a,b) + c = dot(a,b,True) assert_equal(c.mask, [[1,1],[1,0]]) - c = dot(b,a) + c = dot(b,a,True) assert_equal(c.mask, [[1,1,1],[1,0,0],[1,0,0]]) + c = dot(a,b,False) + assert_equal(c, N.dot(a.filled(0), b.filled(0))) + c = dot(b,a,False) + assert_equal(c, N.dot(b.filled(0), a.filled(0))) # m = [0,0,0,0,0,1] a = masked_array(n, mask=m).reshape(2,3) b = masked_array(n, mask=m).reshape(3,2) - c = dot(a,b) + c = dot(a,b,True) assert_equal(c.mask,[[0,1],[1,1]]) - c = dot(b,a) + c = dot(b,a,True) assert_equal(c.mask, [[0,0,1],[0,0,1],[1,1,1]]) + c = dot(a,b,False) + assert_equal(c, N.dot(a.filled(0), b.filled(0))) + assert_equal(c, dot(a,b)) + c = dot(b,a,False) + assert_equal(c, N.dot(b.filled(0), a.filled(0))) # m = [0,0,0,0,0,0] a = masked_array(n, mask=m).reshape(2,3) @@ -242,24 +251,36 @@ # a = masked_array(n, mask=[1,0,0,0,0,0]).reshape(2,3) b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2) - c = dot(a,b) + c = dot(a,b,True) assert_equal(c.mask,[[1,1],[0,0]]) - c = dot(b,a) + c = dot(a,b,False) + assert_equal(c, N.dot(a.filled(0),b.filled(0))) + c = dot(b,a,True) assert_equal(c.mask,[[1,0,0],[1,0,0],[1,0,0]]) + c = dot(b,a,False) + assert_equal(c, N.dot(b.filled(0),a.filled(0))) # a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3) b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2) + c = dot(a,b,True) + assert_equal(c.mask,[[0,0],[1,1]]) c = dot(a,b) - assert_equal(c.mask,[[0,0],[1,1]]) - c = dot(b,a) + assert_equal(c, N.dot(a.filled(0),b.filled(0))) + c = dot(b,a,True) assert_equal(c.mask,[[0,0,1],[0,0,1],[0,0,1]]) + c = dot(b,a,False) + assert_equal(c, N.dot(b.filled(0), a.filled(0))) # a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3) b = masked_array(n, mask=[0,0,1,0,0,0]).reshape(3,2) - c = dot(a,b) + c = dot(a,b,True) assert_equal(c.mask,[[1,0],[1,1]]) - c = dot(b,a) + c = dot(a,b,False) + assert_equal(c, N.dot(a.filled(0),b.filled(0))) + c = dot(b,a,True) assert_equal(c.mask,[[0,0,1],[1,1,1],[0,0,1]]) + c = dot(b,a,False) + assert_equal(c, N.dot(b.filled(0),a.filled(0))) def test_mediff1d(self): "Tests mediff1d" Modified: trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py =================================================================== --- trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py 2007-06-15 01:00:10 UTC (rev 3106) +++ trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py 2007-06-15 16:14:55 UTC (rev 3107) @@ -41,54 +41,74 @@ base_d = N.r_[d,d[::-1]].reshape(2,-1).T base_m = N.r_[[m, m[::-1]]].T base = MA.array(base_d, mask=base_m) - mrecord = fromarrays(base.T,) + mrecord = fromarrays(base.T, dtype=[('a',N.float_),('b',N.float_)]) self.data = [d, m, mrecord] def test_get(self): "Tests fields retrieval" [d, m, mrec] = self.data mrec = mrec.copy() - assert_equal(mrec.f0, MA.array(d,mask=m)) - assert_equal(mrec.f1, MA.array(d[::-1],mask=m[::-1])) + assert_equal(mrec.a, MA.array(d,mask=m)) + assert_equal(mrec.b, MA.array(d[::-1],mask=m[::-1])) assert((mrec._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all()) assert_equal(mrec._mask, N.r_[[m,m[::-1]]].all(0)) - assert_equal(mrec.f0[1], mrec[1].f0) + assert_equal(mrec.a[1], mrec[1].a) # assert(isinstance(mrec[:2], MaskedRecords)) - assert_equal(mrec[:2]['f0'], d[:2]) + assert_equal(mrec[:2]['a'], d[:2]) def test_set(self): "Tests setting fields/attributes." [d, m, mrecord] = self.data - mrecord.f0._data[:] = 5 - assert_equal(mrecord['f0']._data, [5,5,5,5,5]) - mrecord.f0 = 1 - assert_equal(mrecord['f0']._data, [1]*5) - assert_equal(getmaskarray(mrecord['f0']), [0]*5) - mrecord.f1 = MA.masked - assert_equal(mrecord.f1.mask, [1]*5) - assert_equal(getmaskarray(mrecord['f1']), [1]*5) + mrecord.a._data[:] = 5 + assert_equal(mrecord['a']._data, [5,5,5,5,5]) + mrecord.a = 1 + assert_equal(mrecord['a']._data, [1]*5) + assert_equal(getmaskarray(mrecord['a']), [0]*5) + mrecord.b = MA.masked + assert_equal(mrecord.b.mask, [1]*5) + assert_equal(getmaskarray(mrecord['b']), [1]*5) mrecord._mask = MA.masked - assert_equal(getmaskarray(mrecord['f1']), [1]*5) - assert_equal(mrecord['f0']._mask, mrecord['f1']._mask) + assert_equal(getmaskarray(mrecord['b']), [1]*5) + assert_equal(mrecord['a']._mask, mrecord['b']._mask) mrecord._mask = MA.nomask - assert_equal(getmaskarray(mrecord['f1']), [0]*5) - assert_equal(mrecord['f0']._mask, mrecord['f1']._mask) + assert_equal(getmaskarray(mrecord['b']), [0]*5) + assert_equal(mrecord['a']._mask, mrecord['b']._mask) # + def test_setfields(self): + "Tests setting fields." + [d, m, mrecord] = self.data + mrecord.a[3:] = 5 + assert_equal(mrecord.a, [0,1,2,5,5]) + assert_equal(mrecord.a._mask, [1,0,0,0,0]) + # + mrecord.b[3:] = masked + assert_equal(mrecord.b, [4,3,2,1,0]) + assert_equal(mrecord.b._mask, [1,1,0,1,1]) + def test_setslices(self): "Tests setting slices." [d, m, mrec] = self.data mrec[:2] = 5 - assert_equal(mrec.f0._data, [5,5,2,3,4]) - assert_equal(mrec.f1._data, [5,5,2,1,0]) - assert_equal(mrec.f0._mask, [0,0,0,1,1]) - assert_equal(mrec.f1._mask, [0,0,0,0,1]) + assert_equal(mrec.a._data, [5,5,2,3,4]) + assert_equal(mrec.b._data, [5,5,2,1,0]) + assert_equal(mrec.a._mask, [0,0,0,1,1]) + assert_equal(mrec.b._mask, [0,0,0,0,1]) + # + mrec[:2] = masked + assert_equal(mrec._mask, [1,1,0,0,1]) + mrec[-2] = masked + assert_equal(mrec._mask, [1,1,0,1,1]) + # + def test_setslices_hardmask(self): + "Tests setting slices w/ hardmask." + [d, m, mrec] = self.data mrec.harden_mask() mrec[-2:] = 5 - assert_equal(mrec.f0._data, [5,5,2,3,4]) - assert_equal(mrec.f1._data, [5,5,2,5,0]) - assert_equal(mrec.f0._mask, [0,0,0,1,1]) - assert_equal(mrec.f1._mask, [0,0,0,0,1]) + assert_equal(mrec.a._data, [0,1,2,3,4]) + assert_equal(mrec.b._data, [4,3,2,5,0]) + assert_equal(mrec.a._mask, [1,0,0,1,1]) + assert_equal(mrec.b._mask, [1,1,0,0,1]) def test_hardmask(self): "Test hardmask" @@ -101,24 +121,26 @@ mrec.soften_mask() assert(not mrec._hardmask) mrec._mask = nomask - assert(mrec['f1']._mask is nomask) - assert_equal(mrec['f0']._mask,mrec['f1']._mask) + assert(mrec['b']._mask is nomask) + assert_equal(mrec['a']._mask,mrec['b']._mask) def test_fromrecords(self): "Test from recarray." [d, m, mrec] = self.data - nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]]) - mrecfr = fromrecords(nrec.tolist()) - assert_equal(mrecfr.f0, mrec.f0) - assert_equal(mrecfr.dtype, mrec.dtype) + nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]], + dtype=[('a',N.float_),('b',N.float_)]) #.................... mrecfr = fromrecords(nrec) - assert_equal(mrecfr.f0, mrec.f0) + assert_equal(mrecfr.a, mrec.a) assert_equal(mrecfr.dtype, mrec.dtype) #.................... tmp = mrec[::-1] #.tolist() mrecfr = fromrecords(tmp) - assert_equal(mrecfr.f0, mrec.f0[::-1]) + assert_equal(mrecfr.a, mrec.a[::-1]) + #.................... + mrecfr = fromrecords(nrec.tolist()) + assert_equal(mrecfr.a, mrec.a) + assert_equal(mrecfr.dtype, mrec.dtype) def test_fromtextfile(self): "Tests reading from a text file." From scipy-svn at scipy.org Fri Jun 15 13:57:32 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 15 Jun 2007 12:57:32 -0500 (CDT) Subject: [Scipy-svn] r3108 - trunk/Lib/stats Message-ID: <20070615175732.7861E39C141@new.scipy.org> Author: oliphant Date: 2007-06-15 12:57:26 -0500 (Fri, 15 Jun 2007) New Revision: 3108 Modified: trunk/Lib/stats/stats.py Log: Replace 'as' variable name Modified: trunk/Lib/stats/stats.py =================================================================== --- trunk/Lib/stats/stats.py 2007-06-15 16:14:55 UTC (rev 3107) +++ trunk/Lib/stats/stats.py 2007-06-15 17:57:26 UTC (rev 3108) @@ -2094,8 +2094,8 @@ ) """ it = np.argsort(a) - as = a[it] - return as, it + as_ = a[it] + return as_, it def rankdata(a): """Ranks the data in a, dealing with ties appropriately. From scipy-svn at scipy.org Mon Jun 18 19:22:20 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 18 Jun 2007 18:22:20 -0500 (CDT) Subject: [Scipy-svn] r3109 - trunk/Lib/io Message-ID: <20070618232220.EC1E739C1B7@new.scipy.org> Author: wnbell Date: 2007-06-18 18:22:19 -0500 (Mon, 18 Jun 2007) New Revision: 3109 Modified: trunk/Lib/io/mmio.py Log: added support for (sparse) "pattern" Matrix Market matrices Modified: trunk/Lib/io/mmio.py =================================================================== --- trunk/Lib/io/mmio.py 2007-06-15 17:57:26 UTC (rev 3108) +++ trunk/Lib/io/mmio.py 2007-06-18 23:22:19 UTC (rev 3109) @@ -115,7 +115,7 @@ elif field=='complex': dtype='D' elif field=='pattern': - raise NotImplementedError,`field` + dtype='d' else: raise ValueError,`field` @@ -123,7 +123,8 @@ is_complex = field=='complex' is_skew = symm=='skew-symmetric' is_herm = symm=='hermitian' - + is_pattern = field=='pattern' + if rep == 'array': a = zeros((rows,cols),dtype=dtype) line = 1 @@ -193,7 +194,9 @@ l = line.split() i = int(l[0])-1 j = int(l[1])-1 - if is_complex: + if is_pattern: + aij = 1.0 #use 1.0 for pattern matrices + elif is_complex: aij = complex(*map(float,l[2:])) else: aij = float(l[2]) From scipy-svn at scipy.org Tue Jun 19 11:08:00 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Tue, 19 Jun 2007 10:08:00 -0500 (CDT) Subject: [Scipy-svn] r3110 - in trunk/Lib/cluster: . src tests Message-ID: <20070619150800.4DC9F39C08F@new.scipy.org> Author: cdavid Date: 2007-06-19 10:07:48 -0500 (Tue, 19 Jun 2007) New Revision: 3110 Added: trunk/Lib/cluster/src/vq.c trunk/Lib/cluster/src/vq.def trunk/Lib/cluster/src/vq.tpl trunk/Lib/cluster/src/vq_module.c Removed: trunk/Lib/cluster/src/swig_num.i trunk/Lib/cluster/src/vq.i trunk/Lib/cluster/src/vq_wrap.cpp Modified: trunk/Lib/cluster/setup.py trunk/Lib/cluster/src/vq.h trunk/Lib/cluster/tests/test_vq.py trunk/Lib/cluster/vq.py Log: Add support for rank 1 arrays in kmean: * swig interface was not compatible anymore with current swig, so the module was converted to pure C python module * all tests pass again, including for rank 1 array. Modified: trunk/Lib/cluster/setup.py =================================================================== --- trunk/Lib/cluster/setup.py 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/setup.py 2007-06-19 15:07:48 UTC (rev 3110) @@ -3,13 +3,18 @@ from os.path import join def configuration(parent_package='',top_path=None): - from numpy.distutils.misc_util import Configuration + from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs config = Configuration('cluster',parent_package,top_path) config.add_data_dir('tests') config.add_extension('_vq', - sources=[join('src', 'vq_wrap.cpp')]) + sources=[join('src', 'vq_module.c'), join('src', 'vq.c')], + include_dirs = [get_numpy_include_dirs()]) + #config.add_extension('_vq', + # sources=[join('src', 'vq_wrap.cpp')]) + #config.add_extension('_c_vq', + # sources=[join('src', 'vq.c') ]) return config Deleted: trunk/Lib/cluster/src/swig_num.i =================================================================== --- trunk/Lib/cluster/src/swig_num.i 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/swig_num.i 2007-06-19 15:07:48 UTC (rev 3110) @@ -1,644 +0,0 @@ - -/************************************************************************/ -/* swig_num.i -- typemaps for mapping C arrays to Python Numeric arrays */ -/* */ -/* This file is auto-generated by swig_num_gen.py. */ -/* DO NOT EDIT THIS FILE DIRECTLY unless you want your changes */ -/* clobbered by the next run of the generator. */ -/************************************************************************/ - -%include typemaps.i - -/************************************************************************/ -/* helper functions and initialization */ -/************************************************************************/ -%{ - -#include "numpy/arrayobject.h" - -// hmmm. how do we prevent typedefs from conflicting -// with users definition on complex numbers? -//#include "complex_test.h" -//typedef struct{ float real; -// float imag;} complex; -//typedef struct{ double real; -// double imag;} zcomplex; - - -// used for converting typecodes to memory sizes. -int char_to_size(char type) -{ - if (type=='i') return sizeof(int); - if (type=='f') return sizeof(float); - if (type=='d') return sizeof(double); - if (type=='c') return 2*sizeof(float); - if (type=='z') return 2*sizeof(double); -} -int char_to_numtype(char type) -{ - if (type=='i') return 'i'; - if (type=='f') return 'f'; - if (type=='d') return 'd'; - if (type=='c') return 'F'; - if (type=='z') return 'D'; -} -%} - -%init %{ - import_array(); -%} - -%{ - -typedef int int_IN_D0; -typedef float float_IN_D0; -typedef double double_IN_D0; -typedef int int_IN_D1; -typedef float float_IN_D1; -typedef double double_IN_D1; -typedef int int_IN_D2; -typedef float float_IN_D2; -typedef double double_IN_D2; -typedef int int_IN_D0_D1; -typedef float float_IN_D0_D1; -typedef double double_IN_D0_D1; -typedef int int_IN_D0_D2; -typedef float float_IN_D0_D2; -typedef double double_IN_D0_D2; -typedef int int_IN_D1_D2; -typedef float float_IN_D1_D2; -typedef double double_IN_D1_D2; -typedef int int_IN_D0_D1_D2; -typedef float float_IN_D0_D1_D2; -typedef double double_IN_D0_D1_D2; - - -typedef int int_ARGOUT_D0; -typedef float float_ARGOUT_D0; -typedef double double_ARGOUT_D0; -typedef int int_ARGOUT_D1; -typedef float float_ARGOUT_D1; -typedef double double_ARGOUT_D1; -typedef int int_ARGOUT_D2; -typedef float float_ARGOUT_D2; -typedef double double_ARGOUT_D2; -typedef int int_ARGOUT_D0_D1; -typedef float float_ARGOUT_D0_D1; -typedef double double_ARGOUT_D0_D1; -typedef int int_ARGOUT_D0_D2; -typedef float float_ARGOUT_D0_D2; -typedef double double_ARGOUT_D0_D2; -typedef int int_ARGOUT_D1_D2; -typedef float float_ARGOUT_D1_D2; -typedef double double_ARGOUT_D1_D2; -typedef int int_ARGOUT_D0_D1_D2; -typedef float float_ARGOUT_D0_D1_D2; -typedef double double_ARGOUT_D0_D1_D2; - - -typedef int int_ARGOUT_TUPLE_D0; -typedef float float_ARGOUT_TUPLE_D0; -typedef double double_ARGOUT_TUPLE_D0; -typedef int int_ARGOUT_TUPLE_D1; -typedef float float_ARGOUT_TUPLE_D1; -typedef double double_ARGOUT_TUPLE_D1; -typedef int int_ARGOUT_TUPLE_D2; -typedef float float_ARGOUT_TUPLE_D2; -typedef double double_ARGOUT_TUPLE_D2; -typedef int int_ARGOUT_TUPLE_D0_D1; -typedef float float_ARGOUT_TUPLE_D0_D1; -typedef double double_ARGOUT_TUPLE_D0_D1; -typedef int int_ARGOUT_TUPLE_D0_D2; -typedef float float_ARGOUT_TUPLE_D0_D2; -typedef double double_ARGOUT_TUPLE_D0_D2; -typedef int int_ARGOUT_TUPLE_D1_D2; -typedef float float_ARGOUT_TUPLE_D1_D2; -typedef double double_ARGOUT_TUPLE_D1_D2; -typedef int int_ARGOUT_TUPLE_D0_D1_D2; -typedef float float_ARGOUT_TUPLE_D0_D1_D2; -typedef double double_ARGOUT_TUPLE_D0_D1_D2; - - -typedef int int_OUT_D0; -typedef float float_OUT_D0; -typedef double double_OUT_D0; -typedef int int_OUT_D1; -typedef float float_OUT_D1; -typedef double double_OUT_D1; -typedef int int_OUT_D2; -typedef float float_OUT_D2; -typedef double double_OUT_D2; -typedef int int_OUT_D0_D1; -typedef float float_OUT_D0_D1; -typedef double double_OUT_D0_D1; -typedef int int_OUT_D0_D2; -typedef float float_OUT_D0_D2; -typedef double double_OUT_D0_D2; -typedef int int_OUT_D1_D2; -typedef float float_OUT_D1_D2; -typedef double double_OUT_D1_D2; -typedef int int_OUT_D0_D1_D2; -typedef float float_OUT_D0_D1_D2; -typedef double double_OUT_D0_D1_D2; - - -%} - -/************************************************************************/ -/* typemap code for IN arguments */ -/************************************************************************/ -%{ -PyArrayObject* IN_in(PyObject* source, char* basetype_string, - int** target_dims, int dims) -{ - PyArrayObject *a_obj; - char ar_type = char_to_numtype(basetype_string[0]); - a_obj = (PyArrayObject*) PyArray_ContiguousFromObject(source,ar_type, - dims,dims); - if (a_obj == NULL) - { - //PyArray Contiguous From Object will set the error value. - return NULL; - } - for(int i = 0; i < dims;i++) - { - *(target_dims[i]) = a_obj->dimensions[i]; - } - return a_obj; -} - -%} -%typemap(python,freearg) DECREF { Py_XDECREF($arg); } - -%typemap(python,in) IN_D0 * -{ - int* targ_dims[1] = {_d0}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D0 * = DECREF; - -%typemap(python,in) IN_D1 * -{ - int* targ_dims[1] = {_d1}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D1 * = DECREF; - -%typemap(python,in) IN_D2 * -{ - int* targ_dims[1] = {_d2}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D2 * = DECREF; - -%typemap(python,in) IN_D0_D1 * -{ - int* targ_dims[2] = {_d0,_d1}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D0_D1 * = DECREF; - -%typemap(python,in) IN_D0_D2 * -{ - int* targ_dims[2] = {_d0,_d2}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D0_D2 * = DECREF; - -%typemap(python,in) IN_D1_D2 * -{ - int* targ_dims[2] = {_d1,_d2}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D1_D2 * = DECREF; - -%typemap(python,in) IN_D0_D1_D2 * -{ - int* targ_dims[3] = {_d0,_d1,_d2}; - PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,3); - if (a_obj == NULL) return NULL; - $target = ($type) a_obj->data; - $source = (PyObject*)a_obj; -} - -%typemap(python,freearg) IN_D0_D1_D2 * = DECREF; - -/************************************************************************/ -/* typemap code for ARGOUT arguments */ -/************************************************************************/ -%{ -char* ARGOUT_check(char* basetype_string,int* dims, int dim_len) -{ - char *rdata; - int element_size = char_to_size(basetype_string[0]); - int tot_length = 1; - for (int i = 0; i < dim_len; i++) - tot_length *= dims[i]; - rdata = (char*)malloc(tot_length*element_size); - if(rdata == NULL) - { - PyErr_SetString(PyExc_MemoryError, "can't allocate memory for output array for arg$argnum"); - return NULL; - } - return rdata; -} - -PyObject* ARGOUT_argout(char* source, char* basetype_string, - int* dims, int dim_len) -{ - PyArrayObject *res; - char array_type = char_to_numtype(basetype_string[0]); - res = (PyArrayObject *)PyArray_FromDimsAndData(dim_len, dims, - array_type,source); - if(res == NULL) - { - //PyErr_SetString(PyExc_ValueError, "error converting internal data to array"); - return NULL; - } - res->flags |= NPY_OWNDATA; // we want the array to deallocate mem when it is finished. - // stick result in the output tuple (target). - // Need to think about generality of this one... - return (PyObject *) res; -} -%} - -%typemap(python, ignore) ARGOUT_D0 * {} -%typemap(python, check) ARGOUT_D0 * -{ - int dim_len = 1; - int dims[1] = {*_d0}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D0 * -{ - int dim_len = 1; - int dims[1] = {*_d0}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D1 * {} -%typemap(python, check) ARGOUT_D1 * -{ - int dim_len = 1; - int dims[1] = {*_d1}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D1 * -{ - int dim_len = 1; - int dims[1] = {*_d1}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D2 * {} -%typemap(python, check) ARGOUT_D2 * -{ - int dim_len = 1; - int dims[1] = {*_d2}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D2 * -{ - int dim_len = 1; - int dims[1] = {*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D0_D1 * {} -%typemap(python, check) ARGOUT_D0_D1 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d1}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D0_D1 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d1}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D0_D2 * {} -%typemap(python, check) ARGOUT_D0_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d2}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D0_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D1_D2 * {} -%typemap(python, check) ARGOUT_D1_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d1,*_d2}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D1_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d1,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) ARGOUT_D0_D1_D2 * {} -%typemap(python, check) ARGOUT_D0_D1_D2 * -{ - int dim_len = 3; - int dims[3] = {*_d0,*_d1,*_d2}; - $target = ($type) ARGOUT_check("$basetype",dims,dim_len); - if ($target == NULL) return NULL; -} -%typemap(python, argout) ARGOUT_D0_D1_D2 * -{ - int dim_len = 3; - int dims[3] = {*_d0,*_d1,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -/************************************************************************/ -/* typemap code for ARGOUT_TUPLE arguments */ -/************************************************************************/ - -%typemap(python, ignore) ARGOUT_TUPLE_D0 * = ARGOUT_D0 *; -%typemap(python, check) ARGOUT_TUPLE_D0 * = ARGOUT_D0 *; -%typemap(python, argout) ARGOUT_TUPLE_D0 * -{ - int dim_len = 1; - int dims[1] = {*_d0}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D1 * = ARGOUT_D1 *; -%typemap(python, check) ARGOUT_TUPLE_D1 * = ARGOUT_D1 *; -%typemap(python, argout) ARGOUT_TUPLE_D1 * -{ - int dim_len = 1; - int dims[1] = {*_d1}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D2 * = ARGOUT_D2 *; -%typemap(python, check) ARGOUT_TUPLE_D2 * = ARGOUT_D2 *; -%typemap(python, argout) ARGOUT_TUPLE_D2 * -{ - int dim_len = 1; - int dims[1] = {*_d2}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D0_D1 * = ARGOUT_D0_D1 *; -%typemap(python, check) ARGOUT_TUPLE_D0_D1 * = ARGOUT_D0_D1 *; -%typemap(python, argout) ARGOUT_TUPLE_D0_D1 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d1}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D0_D2 * = ARGOUT_D0_D2 *; -%typemap(python, check) ARGOUT_TUPLE_D0_D2 * = ARGOUT_D0_D2 *; -%typemap(python, argout) ARGOUT_TUPLE_D0_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d0,*_d2}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D1_D2 * = ARGOUT_D1_D2 *; -%typemap(python, check) ARGOUT_TUPLE_D1_D2 * = ARGOUT_D1_D2 *; -%typemap(python, argout) ARGOUT_TUPLE_D1_D2 * -{ - int dim_len = 2; - int dims[2] = {*_d1,*_d2}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -%typemap(python, ignore) ARGOUT_TUPLE_D0_D1_D2 * = ARGOUT_D0_D1_D2 *; -%typemap(python, check) ARGOUT_TUPLE_D0_D1_D2 * = ARGOUT_D0_D1_D2 *; -%typemap(python, argout) ARGOUT_TUPLE_D0_D1_D2 * -{ - int dim_len = 3; - int dims[3] = {*_d0,*_d1,*_d2}; - PyObject * res; - res = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) - return NULL; - $target = t_output_helper($target, res); -} - -/************************************************************************/ -/* typemap code for OUT arguments */ -/************************************************************************/ - -%typemap(python, out) OUT_D0 * -{ - // identical to typemap(python,argout) ARGOUT__D0 *; - int dim_len = 1; - int dims[1] = {*_d0}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D1 * -{ - // identical to typemap(python,argout) ARGOUT__D1 *; - int dim_len = 1; - int dims[1] = {*_d1}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D2 * -{ - // identical to typemap(python,argout) ARGOUT__D2 *; - int dim_len = 1; - int dims[1] = {*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D0_D1 * -{ - // identical to typemap(python,argout) ARGOUT__D0_D1 *; - int dim_len = 2; - int dims[2] = {*_d0,*_d1}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D0_D2 * -{ - // identical to typemap(python,argout) ARGOUT__D0_D2 *; - int dim_len = 2; - int dims[2] = {*_d0,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D1_D2 * -{ - // identical to typemap(python,argout) ARGOUT__D1_D2 *; - int dim_len = 2; - int dims[2] = {*_d1,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, out) OUT_D0_D1_D2 * -{ - // identical to typemap(python,argout) ARGOUT__D0_D1_D2 *; - int dim_len = 3; - int dims[3] = {*_d0,*_d1,*_d2}; - $target = ARGOUT_argout( (char*) $source,"$basetype", - dims, dim_len); - if ($target == NULL) return NULL; -} - -%typemap(python, ignore) int DIM0(int* _d0) -{ _d0 = &$target; } -%typemap(python, ignore) int DIM1(int* _d1) -{ _d1 = &$target; } -%typemap(python, ignore) int DIM2(int* _d2) -{ _d2 = &$target; } - -%typemap(python, in) int IN_DIM0(int* _d0) -{ - $target = (int) PyInt_AsLong($source); - _d0 = &$target; -} -%typemap(python, in) int IN_DIM1(int* _d1) -{ - $target = (int) PyInt_AsLong($source); - _d1 = &$target; -} -%typemap(python, in) int IN_DIM2(int* _d2) -{ - $target = (int) PyInt_AsLong($source); - _d2 = &$target; -} - - - -%apply IN_D0 * {int_IN_D0 *,float_IN_D0 *,double_IN_D0 *}; -%apply IN_D1 * {int_IN_D1 *,float_IN_D1 *,double_IN_D1 *}; -%apply IN_D2 * {int_IN_D2 *,float_IN_D2 *,double_IN_D2 *}; -%apply IN_D0_D1 * {int_IN_D0_D1 *,float_IN_D0_D1 *,double_IN_D0_D1 *}; -%apply IN_D0_D2 * {int_IN_D0_D2 *,float_IN_D0_D2 *,double_IN_D0_D2 *}; -%apply IN_D1_D2 * {int_IN_D1_D2 *,float_IN_D1_D2 *,double_IN_D1_D2 *}; -%apply IN_D0_D1_D2 * {int_IN_D0_D1_D2 *,float_IN_D0_D1_D2 *,double_IN_D0_D1_D2 *}; - - -%apply ARGOUT_D0 * {int_ARGOUT_D0 *,float_ARGOUT_D0 *,double_ARGOUT_D0 *}; -%apply ARGOUT_D1 * {int_ARGOUT_D1 *,float_ARGOUT_D1 *,double_ARGOUT_D1 *}; -%apply ARGOUT_D2 * {int_ARGOUT_D2 *,float_ARGOUT_D2 *,double_ARGOUT_D2 *}; -%apply ARGOUT_D0_D1 * {int_ARGOUT_D0_D1 *,float_ARGOUT_D0_D1 *,double_ARGOUT_D0_D1 *}; -%apply ARGOUT_D0_D2 * {int_ARGOUT_D0_D2 *,float_ARGOUT_D0_D2 *,double_ARGOUT_D0_D2 *}; -%apply ARGOUT_D1_D2 * {int_ARGOUT_D1_D2 *,float_ARGOUT_D1_D2 *,double_ARGOUT_D1_D2 *}; -%apply ARGOUT_D0_D1_D2 * {int_ARGOUT_D0_D1_D2 *,float_ARGOUT_D0_D1_D2 *,double_ARGOUT_D0_D1_D2 *}; - - -%apply ARGOUT_TUPLE_D0 * {int_ARGOUT_TUPLE_D0 *,float_ARGOUT_TUPLE_D0 *,double_ARGOUT_TUPLE_D0 *}; -%apply ARGOUT_TUPLE_D1 * {int_ARGOUT_TUPLE_D1 *,float_ARGOUT_TUPLE_D1 *,double_ARGOUT_TUPLE_D1 *}; -%apply ARGOUT_TUPLE_D2 * {int_ARGOUT_TUPLE_D2 *,float_ARGOUT_TUPLE_D2 *,double_ARGOUT_TUPLE_D2 *}; -%apply ARGOUT_TUPLE_D0_D1 * {int_ARGOUT_TUPLE_D0_D1 *,float_ARGOUT_TUPLE_D0_D1 *,double_ARGOUT_TUPLE_D0_D1 *}; -%apply ARGOUT_TUPLE_D0_D2 * {int_ARGOUT_TUPLE_D0_D2 *,float_ARGOUT_TUPLE_D0_D2 *,double_ARGOUT_TUPLE_D0_D2 *}; -%apply ARGOUT_TUPLE_D1_D2 * {int_ARGOUT_TUPLE_D1_D2 *,float_ARGOUT_TUPLE_D1_D2 *,double_ARGOUT_TUPLE_D1_D2 *}; -%apply ARGOUT_TUPLE_D0_D1_D2 * {int_ARGOUT_TUPLE_D0_D1_D2 *,float_ARGOUT_TUPLE_D0_D1_D2 *,double_ARGOUT_TUPLE_D0_D1_D2 *}; - - -%apply OUT_D0 * {int_OUT_D0 *,float_OUT_D0 *,double_OUT_D0 *}; -%apply OUT_D1 * {int_OUT_D1 *,float_OUT_D1 *,double_OUT_D1 *}; -%apply OUT_D2 * {int_OUT_D2 *,float_OUT_D2 *,double_OUT_D2 *}; -%apply OUT_D0_D1 * {int_OUT_D0_D1 *,float_OUT_D0_D1 *,double_OUT_D0_D1 *}; -%apply OUT_D0_D2 * {int_OUT_D0_D2 *,float_OUT_D0_D2 *,double_OUT_D0_D2 *}; -%apply OUT_D1_D2 * {int_OUT_D1_D2 *,float_OUT_D1_D2 *,double_OUT_D1_D2 *}; -%apply OUT_D0_D1_D2 * {int_OUT_D0_D1_D2 *,float_OUT_D0_D1_D2 *,double_OUT_D0_D1_D2 *}; - - Added: trunk/Lib/cluster/src/vq.c =================================================================== --- trunk/Lib/cluster/src/vq.c 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq.c 2007-06-19 15:07:48 UTC (rev 3110) @@ -0,0 +1,142 @@ +/* + * vim:syntax=c + */ +#include +#include + +/* + * results is put into code, which contains initially the initial code + * + * mdist and code should have at least n elements + */ +const static double rbig = 1e100; + + +static int float_vq_1d(const float *in, int n, + const float *init, int ncode, + int *code, float *mdist) +{ + int i, j; + float m, d; + + for (i = 0; i < n; ++i) { + m = (float)rbig; + /* Compute the minimal distance for obsvervation i */ + for (j = 0; j < ncode; ++j) { + d = (in[i] - init[j]); + d *= d; + if ( d < m) { + m = d; + } + } + mdist[i] = m; + code[i] = j; + } + return 0; +} + +static int float_vq_obs(const float *obs, + float *code_book, int Ncodes, int Nfeatures, + int* code, float *lowest_dist) +{ + int i,j,k=0; + float dist, diff; + + *lowest_dist = (float) rbig; + for(i = 0; i < Ncodes; i++) { + dist = 0; + for(j=0; j < Nfeatures; j++) { + diff = code_book[k] - obs[j]; + dist += diff*diff; + k++; + } + dist = (float)sqrt(dist); + if (dist < *lowest_dist) { + *code = i; + *lowest_dist = dist; + } + } + + return 0; +} + +int float_tvq( + float* obs, + float* code_book, + int Nobs, int Ncodes, int Nfeatures, + int* codes, float* lowest_dist) +{ + int i; + for( i = 0; i < Nobs; i++) { + float_vq_obs( + &(obs[i*Nfeatures]), + code_book,Ncodes, Nfeatures, + &(codes[i]), &(lowest_dist[i])); + } + return 0; +} + +static int double_vq_1d(const double *in, int n, + const double *init, int ncode, + int *code, double *mdist) +{ + int i, j; + double m, d; + + for (i = 0; i < n; ++i) { + m = (double)rbig; + /* Compute the minimal distance for obsvervation i */ + for (j = 0; j < ncode; ++j) { + d = (in[i] - init[j]); + d *= d; + if ( d < m) { + m = d; + } + } + mdist[i] = m; + code[i] = j; + } + return 0; +} + +static int double_vq_obs(const double *obs, + double *code_book, int Ncodes, int Nfeatures, + int* code, double *lowest_dist) +{ + int i,j,k=0; + double dist, diff; + + *lowest_dist = (double) rbig; + for(i = 0; i < Ncodes; i++) { + dist = 0; + for(j=0; j < Nfeatures; j++) { + diff = code_book[k] - obs[j]; + dist += diff*diff; + k++; + } + dist = (double)sqrt(dist); + if (dist < *lowest_dist) { + *code = i; + *lowest_dist = dist; + } + } + + return 0; +} + +int double_tvq( + double* obs, + double* code_book, + int Nobs, int Ncodes, int Nfeatures, + int* codes, double* lowest_dist) +{ + int i; + for( i = 0; i < Nobs; i++) { + double_vq_obs( + &(obs[i*Nfeatures]), + code_book,Ncodes, Nfeatures, + &(codes[i]), &(lowest_dist[i])); + } + return 0; +} + Added: trunk/Lib/cluster/src/vq.def =================================================================== --- trunk/Lib/cluster/src/vq.def 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq.def 2007-06-19 15:07:48 UTC (rev 3110) @@ -0,0 +1,12 @@ +autogen definitions vq.tpl; + +data_type = { + type_name = float ; + data_type = float ; +} ; + +data_type = { + type_name = double ; + data_type = double ; +} ; + Modified: trunk/Lib/cluster/src/vq.h =================================================================== --- trunk/Lib/cluster/src/vq.h 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq.h 2007-06-19 15:07:48 UTC (rev 3110) @@ -1,57 +1,10 @@ -#ifndef vq_h -#define vq_h -/* -//#define real float -//#define scan_format "%f" +#ifndef _VQ_H_ +#define _VQ_H -#define real double -#define scan_format "%lf" +int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, + int Nfeatures, int* codes, double* lowest_dist); +int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, + int Nfeatures, int* codes, float* lowest_dist); - -void vq_obs(real* obs,real* code_book, int Ncodes, int Nfeatures, - int& code, real& lowest_dist); - -void vq(real* obs,real* code_book, int Nobs, int Ncodes, int Nfeatures, - int* codes, real* lowest_dist); -*/ -#define BIG 10000. - -template -void tvq_obs(T* obs,T* code_book, int Ncodes, int Nfeatures, - int& code, T& lowest_dist) -{ - int i,j,k=0; - T dist, diff; - - lowest_dist = (T) BIG; - for(i=0; i < Ncodes; i++) - { - dist=0; - for(j=0; j < Nfeatures; j++) - { - diff = code_book[k] - obs[j]; - dist += diff*diff; - k++; - } - dist = (T)sqrt(dist); - if (dist < lowest_dist) - { - code = i; - lowest_dist = dist; - } - } -} - -template -void tvq(T* obs,T* code_book, int Nobs, int Ncodes, int Nfeatures, - int* codes, T* lowest_dist) -{ - int i; - for( i = 0; i < Nobs; i++) - { - tvq_obs(&(obs[i*Nfeatures]),code_book,Ncodes,Nfeatures, - codes[i],lowest_dist[i]); - } -} -#endif \ No newline at end of file +#endif Deleted: trunk/Lib/cluster/src/vq.i =================================================================== --- trunk/Lib/cluster/src/vq.i 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq.i 2007-06-19 15:07:48 UTC (rev 3110) @@ -1,30 +0,0 @@ -%module _vq -%{ - -#include "vq.h" - -/* Wrappers for the template code */ - -void float_vq(float * obs,float* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, float* lowest_dist) -{ - tvq(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist); -} - -void double_vq(double * obs,double* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, double* lowest_dist) -{ - tvq(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist); -} - -%} - -%include swig_num.i - -void double_vq(double_IN_D0_D2 *obs,double_IN_D1_D2 *code_book, - int DIM0, int DIM1, int DIM2, - int_ARGOUT_TUPLE_D0 *codes, double_ARGOUT_TUPLE_D0 *lowest_dist); - -void float_vq(float_IN_D0_D2 *obs,float_IN_D1_D2 *code_book, - int DIM0, int DIM1, int DIM2, - int_ARGOUT_TUPLE_D0 *codes, float_ARGOUT_TUPLE_D0 *lowest_dist); Added: trunk/Lib/cluster/src/vq.tpl =================================================================== --- trunk/Lib/cluster/src/vq.tpl 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq.tpl 2007-06-19 15:07:48 UTC (rev 3110) @@ -0,0 +1,79 @@ +[+ AutoGen5 template c +] +/* + * vim:syntax=c + */ +#include +#include + +/* + * results is put into code, which contains initially the initial code + * + * mdist and code should have at least n elements + */ +const static double rbig = 1e100; + +[+ FOR data_type +] +static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, + const [+ (get "type_name") +] *init, int ncode, + int *code, [+ (get "type_name") +] *mdist) +{ + int i, j; + [+ (get "data_type") +] m, d; + + for (i = 0; i < n; ++i) { + m = ([+ (get "data_type") +])rbig; + /* Compute the minimal distance for obsvervation i */ + for (j = 0; j < ncode; ++j) { + d = (in[i] - init[j]); + d *= d; + if ( d < m) { + m = d; + } + } + mdist[i] = m; + code[i] = j; + } + return 0; +} + +static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs, + [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures, + int* code, [+ (get "data_type") +] *lowest_dist) +{ + int i,j,k=0; + [+ (get "data_type") +] dist, diff; + + *lowest_dist = ([+ (get "data_type") +]) rbig; + for(i = 0; i < Ncodes; i++) { + dist = 0; + for(j=0; j < Nfeatures; j++) { + diff = code_book[k] - obs[j]; + dist += diff*diff; + k++; + } + dist = ([+ (get "data_type") +])sqrt(dist); + if (dist < *lowest_dist) { + *code = i; + *lowest_dist = dist; + } + } + + return 0; +} + +int [+ (get "type_name") +]_tvq( + [+ (get "data_type") +]* obs, + [+ (get "data_type") +]* code_book, + int Nobs, int Ncodes, int Nfeatures, + int* codes, [+ (get "data_type") +]* lowest_dist) +{ + int i; + for( i = 0; i < Nobs; i++) { + [+ (get "type_name") +]_vq_obs( + &(obs[i*Nfeatures]), + code_book,Ncodes, Nfeatures, + &(codes[i]), &(lowest_dist[i])); + } + return 0; +} +[+ ENDFOR data_type +] Added: trunk/Lib/cluster/src/vq_module.c =================================================================== --- trunk/Lib/cluster/src/vq_module.c 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq_module.c 2007-06-19 15:07:48 UTC (rev 3110) @@ -0,0 +1,154 @@ +/* + * Last Change: Tue Jun 19 11:00 PM 2007 J + * + */ +#include + +#include + +#include "vq.h" + +PyObject* compute_vq(PyObject*, PyObject*); + +static PyMethodDef vqmethods [] = { + {"vq", compute_vq, METH_VARARGS, "TODO docstring"}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC init_vq(void) +{ + Py_InitModule("_vq", vqmethods); + import_array(); +} + +PyObject* compute_vq(PyObject* self, PyObject* args) +{ + PyObject *obs, *code, *out; + PyArrayObject *obs_a, *code_a; + PyArrayObject *index_a, *dist_a; + int typenum1, typenum2; + int n, nc, d, nd; + + if ( !PyArg_ParseTuple(args, "OO", &obs, &code) ) { + return NULL; + } + + /* Check that obs and code both are arrays of same type, conformant + * dimensions, etc...*/ + if (!(PyArray_Check(obs) && PyArray_Check(code))) { + PyErr_Format(PyExc_ValueError, + "observation and code should be numpy arrays"); + return NULL; + } + + typenum1 = PyArray_TYPE(obs); + typenum2 = PyArray_TYPE(code); + if (typenum1 != typenum1) { + PyErr_Format(PyExc_ValueError, + "observation and code should have same type"); + return NULL; + } + obs_a = (PyArrayObject*)PyArray_FROM_OF(obs, + NPY_CONTIGUOUS | NPY_NOTSWAPPED | NPY_ALIGNED); + if (obs_a == NULL) { + return NULL; + } + + code_a = (PyArrayObject*)PyArray_FROM_OF(code, + NPY_CONTIGUOUS | NPY_NOTSWAPPED | NPY_ALIGNED); + if (code_a == NULL) { + goto clean_obs_a; + } + + if( !(obs_a->nd == code_a->nd)) { + PyErr_Format(PyExc_ValueError, + "observation and code should have same shape"); + goto clean_code_a; + } + + switch (obs_a->nd) { + case 1: + nd = 1; + d = 1; + n = PyArray_DIM(obs, 0); + nc = PyArray_DIM(code, 0); + break; + case 2: + nd = 2; + n = PyArray_DIM(obs, 0); + d = PyArray_DIM(obs, 1); + nc = PyArray_DIM(code, 0); + if (! (d == PyArray_DIM(code, 1)) ) { + PyErr_Format(PyExc_ValueError, + "obs and code should have same number of " + " features (columns)"); + goto clean_code_a; + } + break; + default: + PyErr_Format(PyExc_ValueError, + "rank different than 1 or 2 are not supported"); + goto clean_code_a; + } + + switch (PyArray_TYPE(obs)) { + case NPY_FLOAT: + dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0); + if (dist_a == NULL) { + goto clean_code_a; + } + index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0); + if (index_a == NULL) { + goto clean_dist_a; + } + float_tvq((float*)obs_a->data, (float*)code_a->data, n, nc, d, + (int*)index_a->data, (float*)dist_a->data); + break; + case NPY_DOUBLE: + dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0); + if (dist_a == NULL) { + goto clean_code_a; + } + index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0); + if (index_a == NULL) { + goto clean_dist_a; + } + double_tvq((double*)obs_a->data, (double*)code_a->data, n, nc, d, + (int*)index_a->data, (double*)dist_a->data); + break; + default: + PyErr_Format(PyExc_ValueError, + "type other than float or double not supported"); + goto clean_code_a; + } + + /* Create output */ + out = PyTuple_New(2); + if (out == NULL) { + goto clean_index_a; + } + if (PyTuple_SetItem(out, 0, (PyObject*)index_a)) { + goto clean_out; + } + if (PyTuple_SetItem(out, 1, (PyObject*)dist_a)) { + goto clean_out; + } + + /* Clean everything */ + Py_DECREF(code_a); + Py_DECREF(obs_a); + return out; + +clean_out: + Py_DECREF(out); +clean_dist_a: + Py_DECREF(dist_a); +clean_index_a: + Py_DECREF(index_a); +clean_code_a: + Py_DECREF(code_a); +clean_obs_a: + Py_DECREF(obs_a); + return NULL; +} + Deleted: trunk/Lib/cluster/src/vq_wrap.cpp =================================================================== --- trunk/Lib/cluster/src/vq_wrap.cpp 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/src/vq_wrap.cpp 2007-06-19 15:07:48 UTC (rev 3110) @@ -1,982 +0,0 @@ -/* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 1.3u-20010227-1913 (Alpha 5) - * - * This file is not intended to be easily readable and contains a number of - * coding conventions designed to improve portability and efficiency. Do not make - * changes to this file unless you know what you are doing--modify the SWIG - * interface file instead. - * ----------------------------------------------------------------------------- */ - -#define SWIGPYTHON -/*********************************************************************** - * common.swg - * - * This file contains generic SWIG runtime support for pointer - * type checking as well as a few commonly used macros to control - * external linkage. - * - * Author : David Beazley (beazley at cs.uchicago.edu) - * - * Copyright (c) 1999-2000, The University of Chicago - * - * This file may be freely redistributed without license or fee provided - * this copyright message remains intact. - ************************************************************************/ - -#include - -#if defined(_WIN32) || defined(__WIN32__) -# if defined(_MSC_VER) -# if defined(STATIC_LINKED) -# define SWIGEXPORT(a) a -# else -# define SWIGEXPORT(a) __declspec(dllexport) a -# endif -# else -# if defined(__BORLANDC__) -# define SWIGEXPORT(a) a _export -# else -# define SWIGEXPORT(a) a -# endif -#endif -#else -# define SWIGEXPORT(a) a -#endif - -#ifdef SWIG_GLOBAL -#define SWIGRUNTIME(a) SWIGEXPORT(a) -#else -#define SWIGRUNTIME(a) static a -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct swig_type_info { - char *name; - void *(*converter)(void *); - char *str; - struct swig_type_info *next; - struct swig_type_info *prev; -} swig_type_info; - -#ifdef SWIG_NOINCLUDE -SWIGEXPORT(swig_type_info *) SWIG_TypeRegister(swig_type_info *); -SWIGEXPORT(swig_type_info *) SWIG_TypeCheck(char *c, swig_type_info *); -SWIGEXPORT(void *) SWIG_TypeCast(swig_type_info *, void *); -#else - -static swig_type_info *swig_type_list = 0; - -/* Register a type mapping with the type-checking */ -SWIGRUNTIME(swig_type_info *) -SWIG_TypeRegister(swig_type_info *ti) -{ - swig_type_info *tc, *head, *ret, *next; - /* Check to see if this type has already been registered */ - tc = swig_type_list; - while (tc) { - if (strcmp(tc->name, ti->name) == 0) { - /* Already exists in the table. Just add additional types to the list */ - head = tc; - next = tc->next; - goto l1; - } - tc = tc->prev; - } - head = ti; - next = 0; - - /* Place in list */ - ti->prev = swig_type_list; - swig_type_list = ti; - - /* Build linked lists */ - l1: - ret = head; - tc = ti + 1; - /* Patch up the rest of the links */ - while (tc->name) { - head->next = tc; - tc->prev = head; - head = tc; - tc++; - } - head->next = next; - return ret; -} - -/* Check the typename */ -SWIGRUNTIME(swig_type_info *) -SWIG_TypeCheck(char *c, swig_type_info *ty) -{ - swig_type_info *s; - if (!ty) return 0; /* Void pointer */ - s = ty->next; /* First element always just a name */ - while (s) { - if (strcmp(s->name,c) == 0) { - if (s == ty->next) return s; - /* Move s to the top of the linked list */ - s->prev->next = s->next; - if (s->next) { - s->next->prev = s->prev; - } - /* Insert s as second element in the list */ - s->next = ty->next; - if (ty->next) ty->next->prev = s; - ty->next = s; - return s; - } - s = s->next; - } - return 0; -} - -/* Cast a pointer (needed for C++ inheritance */ -SWIGRUNTIME(void *) -SWIG_TypeCast(swig_type_info *ty, void *ptr) -{ - if ((!ty) || (!ty->converter)) return ptr; - return (*ty->converter)(ptr); -} - -/* Search for a swig_type_info structure */ -SWIGRUNTIME(void *) -SWIG_TypeQuery(const char *name) { - swig_type_info *ty = swig_type_list; - while (ty) { - if (ty->str && (strcmp(name,ty->str) == 0)) return ty; - if (ty->name && (strcmp(name,ty->name) == 0)) return ty; - ty = ty->prev; - } - return 0; -} - -#endif - -#ifdef __cplusplus -} -#endif - - - -/*********************************************************************** - * python.swg - * - * This file contains the runtime support for Python modules - * and includes code for managing global variables and pointer - * type checking. - * - * Author : David Beazley (beazley at cs.uchicago.edu) - ************************************************************************/ - -#include -#include "Python.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SWIG_PY_INT 1 -#define SWIG_PY_FLOAT 2 -#define SWIG_PY_STRING 3 -#define SWIG_PY_POINTER 4 - -/* Constant information structure */ -typedef struct swig_const_info { - int type; - char *name; - long lvalue; - double dvalue; - void *pvalue; - swig_type_info **ptype; -} swig_const_info; - -#ifdef SWIG_NOINCLUDE - -SWIGEXPORT(PyObject *) SWIG_newvarlink(); -SWIGEXPORT(void) SWIG_addvarlink(PyObject *, char *, PyObject *(*)(void), int (*)(PyObject *)); -SWIGEXPORT(int) SWIG_ConvertPtr(PyObject *, void **, swig_type_info *, int); -SWIGEXPORT(void) SWIG_MakePtr(char *c, void *, swig_type_info *); -SWIGEXPORT(PyObject *) SWIG_NewPointerObj(void *, swig_type_info *); -SWIGEXPORT(void) SWIG_InstallConstants(PyObject *d, swig_const_info constants[]); - -#else - -/* ----------------------------------------------------------------------------- - * global variable support code. - * ----------------------------------------------------------------------------- */ - -typedef struct swig_globalvar { - char *name; /* Name of global variable */ - PyObject *(*get_attr)(void); /* Return the current value */ - int (*set_attr)(PyObject *); /* Set the value */ - struct swig_globalvar *next; -} swig_globalvar; - -typedef struct swig_varlinkobject { - PyObject_HEAD - swig_globalvar *vars; -} swig_varlinkobject; - -static PyObject * -swig_varlink_repr(swig_varlinkobject *v) { - v = v; - return PyString_FromString(""); -} - -static int -swig_varlink_print(swig_varlinkobject *v, FILE *fp, int flags) { - swig_globalvar *var; - flags = flags; - fprintf(fp,"Global variables { "); - for (var = v->vars; var; var=var->next) { - fprintf(fp,"%s", var->name); - if (var->next) fprintf(fp,", "); - } - fprintf(fp," }\n"); - return 0; -} - -static PyObject * -swig_varlink_getattr(swig_varlinkobject *v, char *n) { - swig_globalvar *var = v->vars; - while (var) { - if (strcmp(var->name,n) == 0) { - return (*var->get_attr)(); - } - var = var->next; - } - PyErr_SetString(PyExc_NameError,"Unknown C global variable"); - return NULL; -} - -static int -swig_varlink_setattr(swig_varlinkobject *v, char *n, PyObject *p) { - swig_globalvar *var = v->vars; - while (var) { - if (strcmp(var->name,n) == 0) { - return (*var->set_attr)(p); - } - var = var->next; - } - PyErr_SetString(PyExc_NameError,"Unknown C global variable"); - return 1; -} - -statichere PyTypeObject varlinktype = { - PyObject_HEAD_INIT(0) - 0, - "swigvarlink", /* Type name */ - sizeof(swig_varlinkobject), /* Basic size */ - 0, /* Itemsize */ - 0, /* Deallocator */ - (printfunc) swig_varlink_print, /* Print */ - (getattrfunc) swig_varlink_getattr, /* get attr */ - (setattrfunc) swig_varlink_setattr, /* Set attr */ - 0, /* tp_compare */ - (reprfunc) swig_varlink_repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_mapping*/ - 0, /* tp_hash */ -}; - -/* Create a variable linking object for use later */ -SWIGRUNTIME(PyObject *) -SWIG_newvarlink(void) { - swig_varlinkobject *result = 0; - result = PyMem_NEW(swig_varlinkobject,1); - varlinktype.ob_type = &PyType_Type; /* Patch varlinktype into a PyType */ - result->ob_type = &varlinktype; - result->vars = 0; - result->ob_refcnt = 0; - Py_XINCREF((PyObject *) result); - return ((PyObject*) result); -} - -SWIGRUNTIME(void) -SWIG_addvarlink(PyObject *p, char *name, - PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) { - swig_varlinkobject *v; - swig_globalvar *gv; - v= (swig_varlinkobject *) p; - gv = (swig_globalvar *) malloc(sizeof(swig_globalvar)); - gv->name = (char *) malloc(strlen(name)+1); - strcpy(gv->name,name); - gv->get_attr = get_attr; - gv->set_attr = set_attr; - gv->next = v->vars; - v->vars = gv; -} -/* Convert a pointer value */ -SWIGRUNTIME(int) -SWIG_ConvertPtr(PyObject *obj, void **ptr, swig_type_info *ty, int flags) { - unsigned long p; - register int d; - swig_type_info *tc; - char *c; - static PyObject *SWIG_this = 0; - int newref = 0; - - if (!obj || (obj == Py_None)) { - *ptr = 0; - return 0; - } -#ifdef SWIG_COBJECT_TYPES - if (!(PyCObject_Check(obj))) { - if (!SWIG_this) - SWIG_this = PyString_InternFromString("this"); - obj = PyObject_GetAttr(obj,SWIG_this); - newref = 1; - if (!obj) goto type_error; - if (!PyCObject_Check(obj)) { - Py_DECREF(obj); - goto type_error; - } - } - *ptr = PyCObject_AsVoidPtr(obj); - c = (char *) PyCObject_GetDesc(obj); - if (newref) Py_DECREF(obj); - goto cobject; -#else - if (!(PyString_Check(obj))) { - if (!SWIG_this) - SWIG_this = PyString_InternFromString("this"); - obj = PyObject_GetAttr(obj,SWIG_this); - newref = 1; - if (!obj) goto type_error; - if (!PyString_Check(obj)) { - Py_DECREF(obj); - goto type_error; - } - } - c = PyString_AsString(obj); - p = 0; - /* Pointer values must start with leading underscore */ - if (*c != '_') { - *ptr = (void *) 0; - if (strcmp(c,"NULL") == 0) { - if (newref) Py_DECREF(obj); - return 0; - } else { - if (newref) Py_DECREF(obj); - goto type_error; - } - } - c++; - /* Extract hex value from pointer */ - while ((d = *c)) { - if ((d >= '0') && (d <= '9')) - p = (p << 4) + (d - '0'); - else if ((d >= 'a') && (d <= 'f')) - p = (p << 4) + (d - ('a'-10)); - else - break; - c++; - } - *ptr = (void *) p; - if (newref) Py_DECREF(obj); -#endif - -#ifdef SWIG_COBJECT_TYPES -cobject: -#endif - - if (ty) { - tc = SWIG_TypeCheck(c,ty); - if (!tc) goto type_error; - *ptr = SWIG_TypeCast(tc,(void*)p); - } - return 0; - -type_error: - - if (flags) { - if (ty) { - char *temp = (char *) malloc(64+strlen(ty->name)); - sprintf(temp,"Type error. Expected %s", ty->name); - PyErr_SetString(PyExc_TypeError, temp); - free((char *) temp); - } else { - PyErr_SetString(PyExc_TypeError,"Expected a pointer"); - } - } - return -1; -} - -/* Take a pointer and convert it to a string */ -SWIGRUNTIME(void) -SWIG_MakePtr(char *c, void *ptr, swig_type_info *ty) { - static char hex[17] = "0123456789abcdef"; - unsigned long p, s; - char result[32], *r; - r = result; - p = (unsigned long) ptr; - if (p > 0) { - while (p > 0) { - s = p & 0xf; - *(r++) = hex[s]; - p = p >> 4; - } - *r = '_'; - while (r >= result) - *(c++) = *(r--); - strcpy (c, ty->name); - } else { - strcpy (c, "NULL"); - } -} - -/* Create a new pointer object */ -SWIGRUNTIME(PyObject *) -SWIG_NewPointerObj(void *ptr, swig_type_info *type) { - char result[512]; - PyObject *robj; - if (!ptr) { - Py_INCREF(Py_None); - return Py_None; - } -#ifdef SWIG_COBJECT_TYPES - robj = PyCObject_FromVoidPtrAndDesc((void *) ptr, type->name, NULL); -#else - SWIG_MakePtr(result,ptr,type); - robj = PyString_FromString(result); -#endif - return robj; -} - -/* Install Constants */ -SWIGRUNTIME(void) -SWIG_InstallConstants(PyObject *d, swig_const_info constants[]) { - int i; - PyObject *obj; - for (i = 0; constants[i].type; i++) { - switch(constants[i].type) { - case SWIG_PY_INT: - obj = PyInt_FromLong(constants[i].lvalue); - break; - case SWIG_PY_FLOAT: - obj = PyFloat_FromDouble(constants[i].dvalue); - break; - case SWIG_PY_STRING: - obj = PyString_FromString((char *) constants[i].pvalue); - break; - case SWIG_PY_POINTER: - obj = SWIG_NewPointerObj(constants[i].pvalue, *(constants[i]).ptype); - break; - default: - obj = 0; - break; - } - if (obj) { - PyDict_SetItemString(d,constants[i].name,obj); - Py_DECREF(obj); - } - } -} - -#endif - -#ifdef __cplusplus -} -#endif - - - -/* -------- TYPES TABLE (BEGIN) -------- */ - -static swig_type_info *swig_types[1]; - -/* -------- TYPES TABLE (END) -------- */ - -#define SWIG_init init_vq - -#define SWIG_name "_vq" - - -#include "vq.h" - -/* Wrappers for the template code */ - -void float_vq(float * obs,float* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, float* lowest_dist) -{ - tvq(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist); -} - -void double_vq(double * obs,double* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, double* lowest_dist) -{ - tvq(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist); -} - - -static PyObject* l_output_helper(PyObject* target, PyObject* o) { - PyObject* o2; - if (!target) { - target = o; - } else if (target == Py_None) { - Py_DECREF(Py_None); - target = o; - } else { - if (!PyList_Check(target)) { - o2 = target; - target = PyList_New(0); - PyList_Append(target, o2); - Py_XDECREF(o2); - } - PyList_Append(target,o); - Py_XDECREF(o); - } - return target; -} - -static PyObject* t_output_helper(PyObject* target, PyObject* o) { - PyObject* o2; - PyObject* o3; - - if (!target) { - target = o; - } else if (target == Py_None) { - Py_DECREF(Py_None); - target = o; - } else { - if (!PyTuple_Check(target)) { - o2 = target; - target = PyTuple_New(1); - PyTuple_SetItem(target, 0, o2); - } - o3 = PyTuple_New(1); - PyTuple_SetItem(o3, 0, o); - - o2 = target; - target = PySequence_Concat(o2, o3); - Py_DECREF(o2); - Py_DECREF(o3); - } - return target; -} - - -#include "numpy/noprefix.h" - -// hmmm. how do we prevent typedefs from conflicting -// with users definition on complex numbers? -//#include "complex_test.h" -//typedef struct{ float real; -// float imag;} complex; -//typedef struct{ double real; -// double imag;} zcomplex; - - -// used for converting typecodes to memory sizes. -int char_to_size(char type) -{ - if (type=='i') return sizeof(int); - if (type=='f') return sizeof(float); - if (type=='d') return sizeof(double); - if (type=='c') return 2*sizeof(float); - if (type=='z') return 2*sizeof(double); -} -int char_to_numtype(char type) -{ - if (type=='i') return 'i'; - if (type=='f') return 'f'; - if (type=='d') return 'd'; - if (type=='c') return 'F'; - if (type=='z') return 'D'; -} - - -typedef int int_IN_D0; -typedef float float_IN_D0; -typedef double double_IN_D0; -typedef int int_IN_D1; -typedef float float_IN_D1; -typedef double double_IN_D1; -typedef int int_IN_D2; -typedef float float_IN_D2; -typedef double double_IN_D2; -typedef int int_IN_D0_D1; -typedef float float_IN_D0_D1; -typedef double double_IN_D0_D1; -typedef int int_IN_D0_D2; -typedef float float_IN_D0_D2; -typedef double double_IN_D0_D2; -typedef int int_IN_D1_D2; -typedef float float_IN_D1_D2; -typedef double double_IN_D1_D2; -typedef int int_IN_D0_D1_D2; -typedef float float_IN_D0_D1_D2; -typedef double double_IN_D0_D1_D2; - - -typedef int int_ARGOUT_D0; -typedef float float_ARGOUT_D0; -typedef double double_ARGOUT_D0; -typedef int int_ARGOUT_D1; -typedef float float_ARGOUT_D1; -typedef double double_ARGOUT_D1; -typedef int int_ARGOUT_D2; -typedef float float_ARGOUT_D2; -typedef double double_ARGOUT_D2; -typedef int int_ARGOUT_D0_D1; -typedef float float_ARGOUT_D0_D1; -typedef double double_ARGOUT_D0_D1; -typedef int int_ARGOUT_D0_D2; -typedef float float_ARGOUT_D0_D2; -typedef double double_ARGOUT_D0_D2; -typedef int int_ARGOUT_D1_D2; -typedef float float_ARGOUT_D1_D2; -typedef double double_ARGOUT_D1_D2; -typedef int int_ARGOUT_D0_D1_D2; -typedef float float_ARGOUT_D0_D1_D2; -typedef double double_ARGOUT_D0_D1_D2; - - -typedef int int_ARGOUT_TUPLE_D0; -typedef float float_ARGOUT_TUPLE_D0; -typedef double double_ARGOUT_TUPLE_D0; -typedef int int_ARGOUT_TUPLE_D1; -typedef float float_ARGOUT_TUPLE_D1; -typedef double double_ARGOUT_TUPLE_D1; -typedef int int_ARGOUT_TUPLE_D2; -typedef float float_ARGOUT_TUPLE_D2; -typedef double double_ARGOUT_TUPLE_D2; -typedef int int_ARGOUT_TUPLE_D0_D1; -typedef float float_ARGOUT_TUPLE_D0_D1; -typedef double double_ARGOUT_TUPLE_D0_D1; -typedef int int_ARGOUT_TUPLE_D0_D2; -typedef float float_ARGOUT_TUPLE_D0_D2; -typedef double double_ARGOUT_TUPLE_D0_D2; -typedef int int_ARGOUT_TUPLE_D1_D2; -typedef float float_ARGOUT_TUPLE_D1_D2; -typedef double double_ARGOUT_TUPLE_D1_D2; -typedef int int_ARGOUT_TUPLE_D0_D1_D2; -typedef float float_ARGOUT_TUPLE_D0_D1_D2; -typedef double double_ARGOUT_TUPLE_D0_D1_D2; - - -typedef int int_OUT_D0; -typedef float float_OUT_D0; -typedef double double_OUT_D0; -typedef int int_OUT_D1; -typedef float float_OUT_D1; -typedef double double_OUT_D1; -typedef int int_OUT_D2; -typedef float float_OUT_D2; -typedef double double_OUT_D2; -typedef int int_OUT_D0_D1; -typedef float float_OUT_D0_D1; -typedef double double_OUT_D0_D1; -typedef int int_OUT_D0_D2; -typedef float float_OUT_D0_D2; -typedef double double_OUT_D0_D2; -typedef int int_OUT_D1_D2; -typedef float float_OUT_D1_D2; -typedef double double_OUT_D1_D2; -typedef int int_OUT_D0_D1_D2; -typedef float float_OUT_D0_D1_D2; -typedef double double_OUT_D0_D1_D2; - - - -PyArrayObject* IN_in(PyObject* source, char* basetype_string, - int** target_dims, int dims) -{ - PyArrayObject *a_obj; - char ar_type = char_to_numtype(basetype_string[0]); - a_obj = (PyArrayObject*) PyArray_ContiguousFromObject(source,ar_type, - dims,dims); - if (a_obj == NULL) - { - //PyArray Contiguous From Object will set the error value. - return NULL; - } - for(int i = 0; i < dims;i++) - { - *(target_dims[i]) = a_obj->dimensions[i]; - } - return a_obj; -} - - -char* ARGOUT_check(char* basetype_string,int* dims, int dim_len) -{ - char *rdata; - int element_size = char_to_size(basetype_string[0]); - int tot_length = 1; - for (int i = 0; i < dim_len; i++) - tot_length *= dims[i]; - rdata = (char*)malloc(tot_length*element_size); - if(rdata == NULL) - { - PyErr_SetString(PyExc_MemoryError, "can't allocate memory for output array for arg$argnum"); - return NULL; - } - return rdata; -} - -PyObject* ARGOUT_argout(char* source, char* basetype_string, - int* dims, int dim_len) -{ - PyArrayObject *res; - char array_type = char_to_numtype(basetype_string[0]); - res = (PyArrayObject *)PyArray_FromDimsAndData(dim_len, dims, - array_type,source); - if(res == NULL) - { - //PyErr_SetString(PyExc_ValueError, "error converting internal data to array"); - return NULL; - } - res->flags |= NPY_OWNDATA; // we want the array to deallocate mem when it is finished. - // stick result in the output tuple (target). - // Need to think about generality of this one... - return (PyObject *) res; -} -#ifdef __cplusplus -extern "C" { -#endif -static PyObject *_wrap_double_vq(PyObject *self, PyObject *args) { - PyObject *resultobj; - double_IN_D0_D2 *arg0 ; - double_IN_D1_D2 *arg1 ; - int arg2 ; - int arg3 ; - int arg4 ; - int_ARGOUT_TUPLE_D0 *arg5 ; - double_ARGOUT_TUPLE_D0 *arg6 ; - int *_d0 ; - int *_d1 ; - int *_d2 ; - PyObject * obj0 = 0 ; - PyObject * obj1 = 0 ; - - { - _d0 = &arg2; - } - { - _d1 = &arg3; - } - { - _d2 = &arg4; - } - { - } - { - } - if(!PyArg_ParseTuple(args,"OO:double_vq",&obj0,&obj1)) return NULL; - { - int* targ_dims[2] = { - _d0,_d2 - }; - PyArrayObject* a_obj = IN_in(obj0,"double_IN_D0_D2",targ_dims,2); - if (a_obj == NULL) return NULL; - arg0 = (double_IN_D0_D2 *) a_obj->data; - obj0 = (PyObject*)a_obj; - } - { - int* targ_dims[2] = { - _d1,_d2 - }; - PyArrayObject* a_obj = IN_in(obj1,"double_IN_D1_D2",targ_dims,2); - if (a_obj == NULL) return NULL; - arg1 = (double_IN_D1_D2 *) a_obj->data; - obj1 = (PyObject*)a_obj; - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - arg5 = (int_ARGOUT_TUPLE_D0 *) ARGOUT_check("int_ARGOUT_TUPLE_D0",dims,dim_len); - if (arg5 == NULL) return NULL; - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - arg6 = (double_ARGOUT_TUPLE_D0 *) ARGOUT_check("double_ARGOUT_TUPLE_D0",dims,dim_len); - if (arg6 == NULL) return NULL; - } - double_vq(arg0,arg1,arg2,arg3,arg4,arg5,arg6); - Py_INCREF(Py_None); - resultobj = Py_None; - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - PyObject * res; - res = ARGOUT_argout( (char*) arg5,"int_ARGOUT_TUPLE_D0", - dims, dim_len); - if (resultobj == NULL) - return NULL; - resultobj = t_output_helper(resultobj, res); - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - PyObject * res; - res = ARGOUT_argout( (char*) arg6,"double_ARGOUT_TUPLE_D0", - dims, dim_len); - if (resultobj == NULL) - return NULL; - resultobj = t_output_helper(resultobj, res); - } - { - Py_XDECREF(obj0); - } - { - Py_XDECREF(obj1); - } - return resultobj; -} - - -static PyObject *_wrap_float_vq(PyObject *self, PyObject *args) { - PyObject *resultobj; - float_IN_D0_D2 *arg0 ; - float_IN_D1_D2 *arg1 ; - int arg2 ; - int arg3 ; - int arg4 ; - int_ARGOUT_TUPLE_D0 *arg5 ; - float_ARGOUT_TUPLE_D0 *arg6 ; - int *_d0 ; - int *_d1 ; - int *_d2 ; - PyObject * obj0 = 0 ; - PyObject * obj1 = 0 ; - - { - _d0 = &arg2; - } - { - _d1 = &arg3; - } - { - _d2 = &arg4; - } - { - } - { - } - if(!PyArg_ParseTuple(args,"OO:float_vq",&obj0,&obj1)) return NULL; - { - int* targ_dims[2] = { - _d0,_d2 - }; - PyArrayObject* a_obj = IN_in(obj0,"float_IN_D0_D2",targ_dims,2); - if (a_obj == NULL) return NULL; - arg0 = (float_IN_D0_D2 *) a_obj->data; - obj0 = (PyObject*)a_obj; - } - { - int* targ_dims[2] = { - _d1,_d2 - }; - PyArrayObject* a_obj = IN_in(obj1,"float_IN_D1_D2",targ_dims,2); - if (a_obj == NULL) return NULL; - arg1 = (float_IN_D1_D2 *) a_obj->data; - obj1 = (PyObject*)a_obj; - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - arg5 = (int_ARGOUT_TUPLE_D0 *) ARGOUT_check("int_ARGOUT_TUPLE_D0",dims,dim_len); - if (arg5 == NULL) return NULL; - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - arg6 = (float_ARGOUT_TUPLE_D0 *) ARGOUT_check("float_ARGOUT_TUPLE_D0",dims,dim_len); - if (arg6 == NULL) return NULL; - } - float_vq(arg0,arg1,arg2,arg3,arg4,arg5,arg6); - Py_INCREF(Py_None); - resultobj = Py_None; - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - PyObject * res; - res = ARGOUT_argout( (char*) arg5,"int_ARGOUT_TUPLE_D0", - dims, dim_len); - if (resultobj == NULL) - return NULL; - resultobj = t_output_helper(resultobj, res); - } - { - int dim_len = 1; - int dims[1] = { - *_d0 - }; - PyObject * res; - res = ARGOUT_argout( (char*) arg6,"float_ARGOUT_TUPLE_D0", - dims, dim_len); - if (resultobj == NULL) - return NULL; - resultobj = t_output_helper(resultobj, res); - } - { - Py_XDECREF(obj0); - } - { - Py_XDECREF(obj1); - } - return resultobj; -} - - -static PyMethodDef _vqMethods[] = { - { "double_vq", _wrap_double_vq, METH_VARARGS }, - { "float_vq", _wrap_float_vq, METH_VARARGS }, - { NULL, NULL } -}; - -#ifdef __cplusplus -} -#endif - -/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (BEGIN) -------- */ - - -static swig_type_info *swig_types_initial[] = { -0 -}; - - -/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (END) -------- */ - -static swig_const_info swig_const_table[] = { -{0}}; - -static PyObject *SWIG_globals; -#ifdef __cplusplus -extern "C" -#endif -SWIGEXPORT(void) init_vq(void) { - PyObject *m, *d; - int i; - SWIG_globals = SWIG_newvarlink(); - m = Py_InitModule("_vq", _vqMethods); - d = PyModule_GetDict(m); - for (i = 0; swig_types_initial[i]; i++) { - swig_types[i] = SWIG_TypeRegister(swig_types_initial[i]); - } - - import_array(); - SWIG_InstallConstants(d,swig_const_table); -} - Modified: trunk/Lib/cluster/tests/test_vq.py =================================================================== --- trunk/Lib/cluster/tests/test_vq.py 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/tests/test_vq.py 2007-06-19 15:07:48 UTC (rev 3110) @@ -1,7 +1,7 @@ #! /usr/bin/env python # David Cournapeau -# Last Change: Fri Jun 08 12:00 PM 2007 J +# Last Change: Tue Jun 19 10:00 PM 2007 J # For now, just copy the tests from sandbox.pyem, so we can check that # kmeans works OK for trivial examples. @@ -12,7 +12,7 @@ import numpy as N set_package_path() -from cluster.vq import kmeans, kmeans2, py_vq, py_vq2, _py_vq_1d +from cluster.vq import kmeans, kmeans2, py_vq, py_vq2, _py_vq_1d, vq try: from cluster import _vq TESTC=True @@ -60,19 +60,36 @@ initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() if TESTC: - label1 = _vq.double_vq(X, initc)[0] + label1, dist = _vq.vq(X, initc) assert_array_equal(label1, LABEL1) + tlabel1, tdist = vq(X, initc) else: print "== not testing C imp of vq ==" - #def check_vq_1d(self, level=1): + #def check_py_vq_1d(self, level=1): + # """Test special rank 1 vq algo, python implementation.""" # data = X[:, 0] # initc = data[:3] # code = initc.copy() - # print _py_vq_1d(data, initc) + # a, b = _py_vq_1d(data, initc) + # ta, tb = py_vq(data[:, N.newaxis], initc[:, N.newaxis]) + # assert_array_equal(a, ta) + # assert_array_equal(b, tb) + def check_vq_1d(self, level=1): + """Test special rank 1 vq algo, python implementation.""" + data = X[:, 0] + initc = data[:3] + code = initc.copy() + if TESTC: + a, b = _vq.vq(data, initc) + ta, tb = py_vq(data[:, N.newaxis], initc[:, N.newaxis]) + assert_array_equal(a, ta) + assert_array_equal(b, tb) + else: + print "== not testing C imp of vq (rank 1) ==" + class test_kmean(NumpyTestCase): - #def check_kmeans def check_kmeans_simple(self, level=1): initc = N.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() @@ -100,18 +117,17 @@ assert_array_almost_equal(code1, CODET1) assert_array_almost_equal(code2, CODET2) - #def check_kmeans2_rank1(self, level=1): - # """Testing simple call to kmeans2 with rank 1 data.""" - # data = N.fromfile(open(DATAFILE1), sep = ", ") - # data = data.reshape((200, 2)) - # data1 = data[:, 0] - # data2 = data[:, 1] + def check_kmeans2_rank1(self, level=1): + """Testing simple call to kmeans2 with rank 1 data.""" + data = N.fromfile(open(DATAFILE1), sep = ", ") + data = data.reshape((200, 2)) + data1 = data[:, 0] + data2 = data[:, 1] - # initc = data1[:3] - # code = initc.copy() - # print _py_vq_1d(data1, code) - # code1 = kmeans2(data1, code, niter = 1)[0] - # code2 = kmeans2(data1, code, niter = 2)[0] + initc = data1[:3] + code = initc.copy() + code1 = kmeans2(data1, code, iter = 1)[0] + code2 = kmeans2(data1, code, iter = 2)[0] def check_kmeans2_init(self, level = 1): """Testing that kmeans2 init methods work.""" @@ -126,6 +142,5 @@ kmeans2(data, 3, minit = 'random') kmeans2(data, 3, minit = 'points') - if __name__ == "__main__": NumpyTest().run() Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-18 23:22:19 UTC (rev 3109) +++ trunk/Lib/cluster/vq.py 2007-06-19 15:07:48 UTC (rev 3110) @@ -139,9 +139,9 @@ c_obs = obs.astype(ct) c_code_book = code_book.astype(ct) if ct is single: - results = _vq.float_vq(c_obs, c_code_book) + results = _vq.vq(c_obs, c_code_book) elif ct is double: - results = _vq.double_vq(c_obs, c_code_book) + results = _vq.vq(c_obs, c_code_book) else: results = py_vq(obs, code_book) except ImportError: @@ -512,7 +512,7 @@ nd = N.ndim(data) if nd == 1: d = 1 - raise ValueError("Input of rank 1 not supported yet") + #raise ValueError("Input of rank 1 not supported yet") elif nd == 2: d = data.shape[1] else: @@ -560,9 +560,21 @@ for j in range(nc): mbs = N.where(label==j) if mbs[0].size > 0: - code[j,:] = N.mean(data[mbs], axis=0) + code[j] = N.mean(data[mbs], axis=0) else: warnings.warn("One of the clusters are empty. " \ "Re-run kmean with a different initialization.") return code, label + +if __name__ == '__main__': + import _vq + a = N.random.randn(4, 2) + b = N.random.randn(2, 2) + + print _vq.vq(a, b) + print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), N.array([[2.], [5.]])) + print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.])) + _vq.vq(a.astype(N.float32), b.astype(N.float32)) + _vq.vq(a, b.astype(N.float32)) + _vq.vq([0], b) From scipy-svn at scipy.org Wed Jun 20 06:23:38 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Jun 2007 05:23:38 -0500 (CDT) Subject: [Scipy-svn] r3111 - in trunk/Lib/cluster: . src Message-ID: <20070620102338.6160339C12B@new.scipy.org> Author: cdavid Date: 2007-06-20 05:23:17 -0500 (Wed, 20 Jun 2007) New Revision: 3111 Modified: trunk/Lib/cluster/setup.py trunk/Lib/cluster/src/vq.tpl trunk/Lib/cluster/src/vq_module.c trunk/Lib/cluster/vq.py Log: Change int to npy_intp for index array + cosmetic change in python code. Modified: trunk/Lib/cluster/setup.py =================================================================== --- trunk/Lib/cluster/setup.py 2007-06-19 15:07:48 UTC (rev 3110) +++ trunk/Lib/cluster/setup.py 2007-06-20 10:23:17 UTC (rev 3111) @@ -2,19 +2,15 @@ from os.path import join -def configuration(parent_package='',top_path=None): +def configuration(parent_package = '', top_path = None): from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs - config = Configuration('cluster',parent_package,top_path) + config = Configuration('cluster', parent_package, top_path) config.add_data_dir('tests') config.add_extension('_vq', sources=[join('src', 'vq_module.c'), join('src', 'vq.c')], include_dirs = [get_numpy_include_dirs()]) - #config.add_extension('_vq', - # sources=[join('src', 'vq_wrap.cpp')]) - #config.add_extension('_c_vq', - # sources=[join('src', 'vq.c') ]) return config Modified: trunk/Lib/cluster/src/vq.tpl =================================================================== --- trunk/Lib/cluster/src/vq.tpl 2007-06-19 15:07:48 UTC (rev 3110) +++ trunk/Lib/cluster/src/vq.tpl 2007-06-20 10:23:17 UTC (rev 3111) @@ -1,6 +1,10 @@ [+ AutoGen5 template c +] /* * vim:syntax=c + * + * This file implements vq for float and double in C. It is a direct + * translation from the swig interface which could not be generated anymore + * with recent swig */ #include #include Modified: trunk/Lib/cluster/src/vq_module.c =================================================================== --- trunk/Lib/cluster/src/vq_module.c 2007-06-19 15:07:48 UTC (rev 3110) +++ trunk/Lib/cluster/src/vq_module.c 2007-06-20 10:23:17 UTC (rev 3111) @@ -1,5 +1,5 @@ /* - * Last Change: Tue Jun 19 11:00 PM 2007 J + * Last Change: Wed Jun 20 04:00 PM 2007 J * */ #include @@ -97,24 +97,24 @@ if (dist_a == NULL) { goto clean_code_a; } - index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0); + index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, PyArray_INTP, 0); if (index_a == NULL) { goto clean_dist_a; } float_tvq((float*)obs_a->data, (float*)code_a->data, n, nc, d, - (int*)index_a->data, (float*)dist_a->data); + (npy_intp*)index_a->data, (float*)dist_a->data); break; case NPY_DOUBLE: dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0); if (dist_a == NULL) { goto clean_code_a; } - index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0); + index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, PyArray_INTP, 0); if (index_a == NULL) { goto clean_dist_a; } double_tvq((double*)obs_a->data, (double*)code_a->data, n, nc, d, - (int*)index_a->data, (double*)dist_a->data); + (npy_intp*)index_a->data, (double*)dist_a->data); break; default: PyErr_Format(PyExc_ValueError, @@ -151,4 +151,3 @@ Py_DECREF(obs_a); return NULL; } - Modified: trunk/Lib/cluster/vq.py =================================================================== --- trunk/Lib/cluster/vq.py 2007-06-19 15:07:48 UTC (rev 3110) +++ trunk/Lib/cluster/vq.py 2007-06-20 10:23:17 UTC (rev 3111) @@ -181,7 +181,8 @@ # d = number of features if N.ndim(obs) == 1: if not N.ndim(obs) == N.ndim(code_book): - raise ValueError("Observation and code_book should have the same rank") + raise ValueError( + "Observation and code_book should have the same rank") else: return _py_vq_1d(obs, code_book) else: @@ -192,7 +193,8 @@ raise ValueError("Observation and code_book should have the same rank") elif not d == code_book.shape[1]: raise ValueError("Code book(%d) and obs(%d) should have the same " \ - "number of features (eg columns)""" % (code_book.shape[1], d)) + "number of features (eg columns)""" % + (code_book.shape[1], d)) code = zeros(n, dtype=int) min_dist = zeros(n) @@ -547,11 +549,7 @@ def _kmeans2(data, code, niter, nc): """ "raw" version of kmeans2. Do not use directly. - Run kmeans with a given initial codebook. - - :undocumented - - """ + Run kmeans with a given initial codebook. """ for i in range(niter): # Compute the nearest neighbour for each obs # using the current code book @@ -568,13 +566,15 @@ return code, label if __name__ == '__main__': - import _vq - a = N.random.randn(4, 2) - b = N.random.randn(2, 2) + pass + #import _vq + #a = N.random.randn(4, 2) + #b = N.random.randn(2, 2) - print _vq.vq(a, b) - print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), N.array([[2.], [5.]])) - print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.])) - _vq.vq(a.astype(N.float32), b.astype(N.float32)) - _vq.vq(a, b.astype(N.float32)) - _vq.vq([0], b) + #print _vq.vq(a, b) + #print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), + # N.array([[2.], [5.]])) + #print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.])) + #_vq.vq(a.astype(N.float32), b.astype(N.float32)) + #_vq.vq(a, b.astype(N.float32)) + #_vq.vq([0], b) From scipy-svn at scipy.org Wed Jun 20 12:35:36 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Wed, 20 Jun 2007 11:35:36 -0500 (CDT) Subject: [Scipy-svn] r3112 - trunk/Lib/cluster/src Message-ID: <20070620163536.020C139C03A@new.scipy.org> Author: cdavid Date: 2007-06-20 11:35:16 -0500 (Wed, 20 Jun 2007) New Revision: 3112 Modified: trunk/Lib/cluster/src/vq.c trunk/Lib/cluster/src/vq.h trunk/Lib/cluster/src/vq.tpl trunk/Lib/cluster/src/vq_module.c Log: Convert int to long long for index for correct support on 64 bits arch Modified: trunk/Lib/cluster/src/vq.c =================================================================== --- trunk/Lib/cluster/src/vq.c 2007-06-20 10:23:17 UTC (rev 3111) +++ trunk/Lib/cluster/src/vq.c 2007-06-20 16:35:16 UTC (rev 3112) @@ -1,5 +1,9 @@ /* * vim:syntax=c + * + * This file implements vq for float and double in C. It is a direct + * translation from the swig interface which could not be generated anymore + * with recent swig */ #include #include @@ -14,7 +18,7 @@ static int float_vq_1d(const float *in, int n, const float *init, int ncode, - int *code, float *mdist) + long long *code, float *mdist) { int i, j; float m, d; @@ -37,7 +41,7 @@ static int float_vq_obs(const float *obs, float *code_book, int Ncodes, int Nfeatures, - int* code, float *lowest_dist) + long long* code, float *lowest_dist) { int i,j,k=0; float dist, diff; @@ -64,7 +68,7 @@ float* obs, float* code_book, int Nobs, int Ncodes, int Nfeatures, - int* codes, float* lowest_dist) + long long* codes, float* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { @@ -78,7 +82,7 @@ static int double_vq_1d(const double *in, int n, const double *init, int ncode, - int *code, double *mdist) + long long *code, double *mdist) { int i, j; double m, d; @@ -101,7 +105,7 @@ static int double_vq_obs(const double *obs, double *code_book, int Ncodes, int Nfeatures, - int* code, double *lowest_dist) + long long* code, double *lowest_dist) { int i,j,k=0; double dist, diff; @@ -128,7 +132,7 @@ double* obs, double* code_book, int Nobs, int Ncodes, int Nfeatures, - int* codes, double* lowest_dist) + long long* codes, double* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { Modified: trunk/Lib/cluster/src/vq.h =================================================================== --- trunk/Lib/cluster/src/vq.h 2007-06-20 10:23:17 UTC (rev 3111) +++ trunk/Lib/cluster/src/vq.h 2007-06-20 16:35:16 UTC (rev 3112) @@ -2,9 +2,9 @@ #define _VQ_H int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, double* lowest_dist); + int Nfeatures, long long* codes, double* lowest_dist); int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, - int Nfeatures, int* codes, float* lowest_dist); + int Nfeatures, long long* codes, float* lowest_dist); #endif Modified: trunk/Lib/cluster/src/vq.tpl =================================================================== --- trunk/Lib/cluster/src/vq.tpl 2007-06-20 10:23:17 UTC (rev 3111) +++ trunk/Lib/cluster/src/vq.tpl 2007-06-20 16:35:16 UTC (rev 3112) @@ -19,7 +19,7 @@ [+ FOR data_type +] static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, const [+ (get "type_name") +] *init, int ncode, - int *code, [+ (get "type_name") +] *mdist) + long long *code, [+ (get "type_name") +] *mdist) { int i, j; [+ (get "data_type") +] m, d; @@ -42,7 +42,7 @@ static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs, [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures, - int* code, [+ (get "data_type") +] *lowest_dist) + long long* code, [+ (get "data_type") +] *lowest_dist) { int i,j,k=0; [+ (get "data_type") +] dist, diff; @@ -69,7 +69,7 @@ [+ (get "data_type") +]* obs, [+ (get "data_type") +]* code_book, int Nobs, int Ncodes, int Nfeatures, - int* codes, [+ (get "data_type") +]* lowest_dist) + long long* codes, [+ (get "data_type") +]* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { Modified: trunk/Lib/cluster/src/vq_module.c =================================================================== --- trunk/Lib/cluster/src/vq_module.c 2007-06-20 10:23:17 UTC (rev 3111) +++ trunk/Lib/cluster/src/vq_module.c 2007-06-20 16:35:16 UTC (rev 3112) @@ -27,7 +27,8 @@ PyArrayObject *obs_a, *code_a; PyArrayObject *index_a, *dist_a; int typenum1, typenum2; - int n, nc, d, nd; + int nc, nd; + npy_intp n, d; if ( !PyArg_ParseTuple(args, "OO", &obs, &code) ) { return NULL; From scipy-svn at scipy.org Fri Jun 22 04:37:26 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Jun 2007 03:37:26 -0500 (CDT) Subject: [Scipy-svn] r3113 - trunk/Lib/sandbox/pyem Message-ID: <20070622083726.3311039C038@new.scipy.org> Author: cdavid Date: 2007-06-22 03:37:20 -0500 (Fri, 22 Jun 2007) New Revision: 3113 Modified: trunk/Lib/sandbox/pyem/gmm_em.py Log: Refactor update step for EM (split diag and full case in subfunction) Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-20 16:35:16 UTC (rev 3112) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-22 08:37:20 UTC (rev 3113) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Tue Jun 12 08:00 PM 2007 J +# Last Change: Thu Jun 21 03:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using @@ -11,7 +11,6 @@ # - which methods to avoid va shrinking to 0 ? There are several options, # not sure which ones are appropriates # - improve EM trainer -# - online EM import numpy as N #import numpy.linalg as lin @@ -186,61 +185,83 @@ tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True) # multiply by the weight tgd += N.log(self.gm.w) - # Normalize to get a pdf + # Normalize to get a (log) pdf gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] return gd, tgd - def update_em(self, data, gamma): - """Computes update of the Gaussian Mixture Model (M step) - from the a posteriori pdf, computed by gmm_posterior - (E step). - """ + def _update_em_diag(self, data, gamma, ngamma): + """Computes update of the Gaussian Mixture Model (M step) from the + responsabilities gamma and normalized responsabilities ngamma, for + diagonal models.""" + #XXX: caching SS may decrease memory consumption k = self.gm.k d = self.gm.d n = data.shape[0] invn = 1.0/n - mGamma = N.sum(gamma, axis = 0) - if self.gm.mode == 'diag': - mu = N.zeros((k, d)) - va = N.zeros((k, d)) - gamma = gamma.T - for c in range(k): - x = N.dot(gamma[c:c+1, :], data)[0, :] - xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :] + mu = N.zeros((k, d)) + va = N.zeros((k, d)) - mu[c, :] = x / mGamma[c] - va[c, :] = xx / mGamma[c] - mu[c, :] ** 2 - w = invn * mGamma + for c in range(k): + x = N.dot(gamma.T[c:c+1, :], data)[0, :] + xx = N.dot(gamma.T[c:c+1, :], data ** 2)[0, :] - elif self.gm.mode == 'full': - # In full mode, this is the bottleneck: the triple loop - # kills performances. This is pretty straightforward - # algebra, so computing it in C should not be too difficult. The - # real problem is to have valid covariance matrices, and to keep - # them positive definite, maybe with special storage... Not sure - # it really worth the risk - mu = N.zeros((k, d)) - va = N.zeros((k*d, d)) + mu[c, :] = x / ngamma[c] + va[c, :] = xx / ngamma[c] - mu[c, :] ** 2 + w = invn * ngamma - gamma = gamma.transpose() - for c in range(k): - #x = N.sum(N.outer(gamma[:, c], - # N.ones((1, d))) * data, axis = 0) - x = N.dot(gamma[c:c+1, :], data)[0, :] - xx = N.zeros((d, d)) - - # This should be much faster than recursing on n... - for i in range(d): - for j in range(d): - xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :], - axis = 0) + return w, mu, va - mu[c, :] = x / mGamma[c] - va[c*d:c*d+d, :] = xx / mGamma[c] \ - - N.outer(mu[c, :], mu[c, :]) - w = invn * mGamma + def _update_em_full(self, data, gamma, ngamma): + """Computes update of the Gaussian Mixture Model (M step) from the + responsabilities gamma and normalized responsabilities ngamma, for + full models.""" + k = self.gm.k + d = self.gm.d + n = data.shape[0] + invn = 1.0/n + + # In full mode, this is the bottleneck: the triple loop + # kills performances. This is pretty straightforward + # algebra, so computing it in C should not be too difficult. The + # real problem is to have valid covariance matrices, and to keep + # them positive definite, maybe with special storage... Not sure + # it really worth the risk + mu = N.zeros((k, d)) + va = N.zeros((k*d, d)) + + #XXX: caching SS may decrease memory consumption + for c in range(k): + #x = N.sum(N.outer(gamma[:, c], + # N.ones((1, d))) * data, axis = 0) + x = N.dot(gamma.T[c:c+1, :], data)[0, :] + xx = N.zeros((d, d)) + + # This should be much faster than recursing on n... + for i in range(d): + for j in range(d): + xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma.T[c, :], + axis = 0) + + mu[c, :] = x / ngamma[c] + va[c*d:c*d+d, :] = xx / ngamma[c] \ + - N.outer(mu[c, :], mu[c, :]) + w = invn * ngamma + + return w, mu, va + + def update_em(self, data, gamma): + """Computes update of the Gaussian Mixture Model (M step) + from the a posteriori pdf, computed by gmm_posterior + (E step). + """ + ngamma = N.sum(gamma, axis = 0) + + if self.gm.mode == 'diag': + w, mu, va = self._update_em_diag(data, gamma, ngamma) + elif self.gm.mode == 'full': + w, mu, va = self._update_em_full(data, gamma, ngamma) else: raise GmmParamError("varmode not recognized") @@ -344,12 +365,13 @@ like = N.zeros(maxiter) # Em computation, with computation of the likelihood - g, tgd = model.compute_responsabilities(data) - like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + g, tgd = model.compute_responsabilities(data) + # TODO: do it in log domain instead + like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) model.update_em(data, g) for i in range(1, maxiter): - g, tgd = model.compute_responsabilities(data) - like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) + g, tgd = model.compute_responsabilities(data) + like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0) model.update_em(data, g) if has_em_converged(like[i], like[i-1], thresh): return like[0:i] From scipy-svn at scipy.org Fri Jun 22 04:55:35 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Jun 2007 03:55:35 -0500 (CDT) Subject: [Scipy-svn] r3115 - in trunk/Lib/sandbox/pyem: . data Message-ID: <20070622085535.2949139C078@new.scipy.org> Author: cdavid Date: 2007-06-22 03:55:20 -0500 (Fri, 22 Jun 2007) New Revision: 3115 Modified: trunk/Lib/sandbox/pyem/TODO trunk/Lib/sandbox/pyem/data/setup.py trunk/Lib/sandbox/pyem/gmm_em.py Log: Add pendigits as a subpackage of data for distutils. Modified: trunk/Lib/sandbox/pyem/TODO =================================================================== --- trunk/Lib/sandbox/pyem/TODO 2007-06-22 08:39:13 UTC (rev 3114) +++ trunk/Lib/sandbox/pyem/TODO 2007-06-22 08:55:20 UTC (rev 3115) @@ -1,10 +1,9 @@ -# Last Change: Sat Jun 09 04:00 PM 2007 J +# Last Change: Fri Jun 22 05:00 PM 2007 J Things which must be implemented for a 1.0 version (in importante order) - A classifier - handle rank 1 for 1d data - basic regularization - - docstrings - demo for pdf estimation, discriminant analysis and clustering - scaling of data: maybe something to handle scaling internally ? Modified: trunk/Lib/sandbox/pyem/data/setup.py =================================================================== --- trunk/Lib/sandbox/pyem/data/setup.py 2007-06-22 08:39:13 UTC (rev 3114) +++ trunk/Lib/sandbox/pyem/data/setup.py 2007-06-22 08:55:20 UTC (rev 3115) @@ -4,6 +4,7 @@ from numpy.distutils.misc_util import Configuration config = Configuration('data',parent_package,top_path) config.add_subpackage('oldfaithful') + config.add_subpackage('pendigits') config.make_config_py() # installs __config__.py return config Modified: trunk/Lib/sandbox/pyem/gmm_em.py =================================================================== --- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-22 08:39:13 UTC (rev 3114) +++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-22 08:55:20 UTC (rev 3115) @@ -1,5 +1,5 @@ # /usr/bin/python -# Last Change: Thu Jun 21 03:00 PM 2007 J +# Last Change: Fri Jun 22 05:00 PM 2007 J """Module implementing GMM, a class to estimate Gaussian mixture models using EM, and EM, a class which use GMM instances to estimate models parameters using From scipy-svn at scipy.org Fri Jun 22 05:10:00 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 22 Jun 2007 04:10:00 -0500 (CDT) Subject: [Scipy-svn] r3116 - trunk/Lib/cluster/src Message-ID: <20070622091000.6ABB039C128@new.scipy.org> Author: cdavid Date: 2007-06-22 04:09:20 -0500 (Fri, 22 Jun 2007) New Revision: 3116 Modified: trunk/Lib/cluster/src/vq.c trunk/Lib/cluster/src/vq.h trunk/Lib/cluster/src/vq.tpl Log: Convert index to npy_intp to avoid problems betweem 32 and 64 bits. Modified: trunk/Lib/cluster/src/vq.c =================================================================== --- trunk/Lib/cluster/src/vq.c 2007-06-22 08:55:20 UTC (rev 3115) +++ trunk/Lib/cluster/src/vq.c 2007-06-22 09:09:20 UTC (rev 3116) @@ -8,6 +8,7 @@ #include #include +#include "vq.h" /* * results is put into code, which contains initially the initial code * @@ -16,9 +17,10 @@ const static double rbig = 1e100; +#if 0 static int float_vq_1d(const float *in, int n, const float *init, int ncode, - long long *code, float *mdist) + npy_intp *code, float *mdist) { int i, j; float m, d; @@ -38,10 +40,11 @@ } return 0; } +#endif static int float_vq_obs(const float *obs, float *code_book, int Ncodes, int Nfeatures, - long long* code, float *lowest_dist) + npy_intp* code, float *lowest_dist) { int i,j,k=0; float dist, diff; @@ -68,7 +71,7 @@ float* obs, float* code_book, int Nobs, int Ncodes, int Nfeatures, - long long* codes, float* lowest_dist) + npy_intp* codes, float* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { @@ -80,9 +83,10 @@ return 0; } +#if 0 static int double_vq_1d(const double *in, int n, const double *init, int ncode, - long long *code, double *mdist) + npy_intp *code, double *mdist) { int i, j; double m, d; @@ -102,10 +106,11 @@ } return 0; } +#endif static int double_vq_obs(const double *obs, double *code_book, int Ncodes, int Nfeatures, - long long* code, double *lowest_dist) + npy_intp* code, double *lowest_dist) { int i,j,k=0; double dist, diff; @@ -132,7 +137,7 @@ double* obs, double* code_book, int Nobs, int Ncodes, int Nfeatures, - long long* codes, double* lowest_dist) + npy_intp* codes, double* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { Modified: trunk/Lib/cluster/src/vq.h =================================================================== --- trunk/Lib/cluster/src/vq.h 2007-06-22 08:55:20 UTC (rev 3115) +++ trunk/Lib/cluster/src/vq.h 2007-06-22 09:09:20 UTC (rev 3116) @@ -1,10 +1,14 @@ #ifndef _VQ_H_ #define _VQ_H +#include + +#include + int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, - int Nfeatures, long long* codes, double* lowest_dist); + int Nfeatures, npy_intp* codes, double* lowest_dist); int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, - int Nfeatures, long long* codes, float* lowest_dist); + int Nfeatures, npy_intp* codes, float* lowest_dist); #endif Modified: trunk/Lib/cluster/src/vq.tpl =================================================================== --- trunk/Lib/cluster/src/vq.tpl 2007-06-22 08:55:20 UTC (rev 3115) +++ trunk/Lib/cluster/src/vq.tpl 2007-06-22 09:09:20 UTC (rev 3116) @@ -9,6 +9,7 @@ #include #include +#include "vq.h" /* * results is put into code, which contains initially the initial code * @@ -17,9 +18,10 @@ const static double rbig = 1e100; [+ FOR data_type +] +#if 0 static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, const [+ (get "type_name") +] *init, int ncode, - long long *code, [+ (get "type_name") +] *mdist) + npy_intp *code, [+ (get "type_name") +] *mdist) { int i, j; [+ (get "data_type") +] m, d; @@ -39,10 +41,11 @@ } return 0; } +#endif static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs, [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures, - long long* code, [+ (get "data_type") +] *lowest_dist) + npy_intp* code, [+ (get "data_type") +] *lowest_dist) { int i,j,k=0; [+ (get "data_type") +] dist, diff; @@ -69,7 +72,7 @@ [+ (get "data_type") +]* obs, [+ (get "data_type") +]* code_book, int Nobs, int Ncodes, int Nfeatures, - long long* codes, [+ (get "data_type") +]* lowest_dist) + npy_intp* codes, [+ (get "data_type") +]* lowest_dist) { int i; for( i = 0; i < Nobs; i++) { From scipy-svn at scipy.org Mon Jun 25 18:38:04 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Mon, 25 Jun 2007 17:38:04 -0500 (CDT) Subject: [Scipy-svn] r3117 - in trunk/Lib/stats: . tests Message-ID: <20070625223804.F233039C059@new.scipy.org> Author: stefan Date: 2007-06-25 17:37:46 -0500 (Mon, 25 Jun 2007) New Revision: 3117 Modified: trunk/Lib/stats/distributions.py trunk/Lib/stats/tests/test_distributions.py Log: Fix geometric probability mass function. Add tests. Modified: trunk/Lib/stats/distributions.py =================================================================== --- trunk/Lib/stats/distributions.py 2007-06-22 09:09:20 UTC (rev 3116) +++ trunk/Lib/stats/distributions.py 2007-06-25 22:37:46 UTC (rev 3117) @@ -3913,7 +3913,7 @@ def _argcheck(self, pr): return (pr<=1) & (pr >= 0) def _pmf(self, k, pr): - return (1-pr)**k * pr + return (1-pr)**(k-1) * pr def _cdf(self, x, pr): k = floor(x) return (1.0-(1.0-pr)**k) Modified: trunk/Lib/stats/tests/test_distributions.py =================================================================== --- trunk/Lib/stats/tests/test_distributions.py 2007-06-22 09:09:20 UTC (rev 3116) +++ trunk/Lib/stats/tests/test_distributions.py 2007-06-25 22:37:46 UTC (rev 3117) @@ -7,7 +7,7 @@ set_package_path() import numpy -from numpy import typecodes +from numpy import typecodes, array import stats restore_path() @@ -138,6 +138,18 @@ assert(isinstance(val, numpy.ndarray)) assert(val.dtype.char in typecodes['AllInteger']) + def check_pmf(self): + vals = stats.geom.pmf([1,2,3],0.5) + assert_array_almost_equal(vals,[0.5,0.25,0.125]) + + def check_cdf_sf(self): + vals = stats.geom.cdf([1,2,3],0.5) + vals_sf = stats.geom.sf([1,2,3],0.5) + expected = array([0.5,0.75,0.875]) + assert_array_almost_equal(vals,expected) + assert_array_almost_equal(vals_sf,1-expected) + + class test_hypergeom(NumpyTestCase): def check_rvs(self): vals = stats.hypergeom.rvs(20, 10, 3, size=(2, 50)) From scipy-svn at scipy.org Thu Jun 28 04:23:37 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 28 Jun 2007 03:23:37 -0500 (CDT) Subject: [Scipy-svn] r3118 - trunk/Lib/special Message-ID: <20070628082337.C4F9539C0B2@new.scipy.org> Author: cookedm Date: 2007-06-28 03:23:20 -0500 (Thu, 28 Jun 2007) New Revision: 3118 Modified: trunk/Lib/special/__init__.py Log: Lib/special: replace some character codes with dtypes Modified: trunk/Lib/special/__init__.py =================================================================== --- trunk/Lib/special/__init__.py 2007-06-25 22:37:46 UTC (rev 3117) +++ trunk/Lib/special/__init__.py 2007-06-28 08:23:20 UTC (rev 3118) @@ -2,7 +2,7 @@ # special - Special Functions # -from info import __doc__ +from info import __doc__, __docformat__ #from special_version import special_version as __version__ from basic import * From scipy-svn at scipy.org Thu Jun 28 04:24:03 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 28 Jun 2007 03:24:03 -0500 (CDT) Subject: [Scipy-svn] r3119 - trunk/Lib/special Message-ID: <20070628082403.4384E39C0BB@new.scipy.org> Author: cookedm Date: 2007-06-28 03:23:59 -0500 (Thu, 28 Jun 2007) New Revision: 3119 Modified: trunk/Lib/special/basic.py Log: Lib/special: Really replace some character codes with dtypes Modified: trunk/Lib/special/basic.py =================================================================== --- trunk/Lib/special/basic.py 2007-06-28 08:23:20 UTC (rev 3118) +++ trunk/Lib/special/basic.py 2007-06-28 08:23:59 UTC (rev 3119) @@ -25,10 +25,10 @@ x,n = asarray(x), asarray(n) n = asarray(n + (x-x)) x = asarray(x + (n-n)) - if x.dtype.char in ['fFdD']: - ytype = x.dtype.char + if issubdtype(x.dtype, inexact): + ytype = x.dtype else: - ytype = 'd' + ytype = float y = zeros(x.shape,ytype) mask1 = (n <= 0) | (n <> floor(n)) @@ -406,7 +406,7 @@ Limit as q->infinity of 1F1(q;a;z/q) """ z = asarray(z) - if z.dtype.char in ['F', 'D']: + if issubdtype(z.dtype, complexfloating): arg = 2*sqrt(abs(z)) num = where(z>=0, iv(v-1,arg), jv(v-1,arg)) den = abs(z)**((v-1.0)/2) From scipy-svn at scipy.org Thu Jun 28 04:27:13 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 28 Jun 2007 03:27:13 -0500 (CDT) Subject: [Scipy-svn] r3120 - trunk/Lib/special/cephes Message-ID: <20070628082713.E002F39C0B2@new.scipy.org> Author: cookedm Date: 2007-06-28 03:24:55 -0500 (Thu, 28 Jun 2007) New Revision: 3120 Modified: trunk/Lib/special/cephes/gamma.c Log: Lib/special: gamma(x) for x >~ 700 would return nan instead of inf. This fixes $53. Modified: trunk/Lib/special/cephes/gamma.c =================================================================== --- trunk/Lib/special/cephes/gamma.c 2007-06-28 08:23:59 UTC (rev 3119) +++ trunk/Lib/special/cephes/gamma.c 2007-06-28 08:24:55 UTC (rev 3120) @@ -291,6 +291,13 @@ { double y, w, v; +if (x >= MAXGAM) { +#ifdef INFINITIES + return (INFINITY); +#else + return (MAXNUM); +#endif +} w = 1.0/x; w = 1.0 + w * polevl( w, STIR, 4 ); y = exp(x); From scipy-svn at scipy.org Fri Jun 29 00:25:19 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Thu, 28 Jun 2007 23:25:19 -0500 (CDT) Subject: [Scipy-svn] r3121 - trunk/Lib/sparse Message-ID: <20070629042519.C6F2D39C040@new.scipy.org> Author: wnbell Date: 2007-06-28 23:23:55 -0500 (Thu, 28 Jun 2007) New Revision: 3121 Modified: trunk/Lib/sparse/sparse.py Log: small edit to CSR/CSC transpose and conj Modified: trunk/Lib/sparse/sparse.py =================================================================== --- trunk/Lib/sparse/sparse.py 2007-06-28 08:24:55 UTC (rev 3120) +++ trunk/Lib/sparse/sparse.py 2007-06-29 04:23:55 UTC (rev 3121) @@ -667,29 +667,11 @@ def _transpose(self, cls, copy=False): M, N = self.shape - if copy: - data = self.data.copy() - index = self.indices.copy() - indptr = self.indptr.copy() - else: - data = self.data - index = self.indices - indptr = self.indptr - return cls((data,index,indptr),(N,M)) + return cls((self.data,self.indices,self.indptr),(N,M),copy=copy) def conj(self, copy=False): - new = self.__class__(self.shape, nzmax=self.nzmax, dtype=self.dtype) - if copy: - new.data = self.data.conj().copy() - new.indices = self.indices.conj().copy() - new.indptr = self.indptr.conj().copy() - else: - new.data = self.data.conj() - new.indices = self.indices.conj() - new.indptr = self.indptr.conj() - new._check() - return new + return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy) def _ensure_sorted_indices(self, shape0, shape1, inplace=False): """Return a copy of this matrix where the row indices are sorted From scipy-svn at scipy.org Fri Jun 29 02:59:14 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 29 Jun 2007 01:59:14 -0500 (CDT) Subject: [Scipy-svn] r3122 - trunk/Lib/sparse Message-ID: <20070629065914.3203339C0A1@new.scipy.org> Author: wnbell Date: 2007-06-29 01:59:02 -0500 (Fri, 29 Jun 2007) New Revision: 3122 Modified: trunk/Lib/sparse/sparse.py Log: Made CSR/CSC format _check() more comprehensive by default. Added constructor arg to (opyionalls) avoid expensive format check. This makes transpose() a O(1) operation instead of O(N) as before. Modified: trunk/Lib/sparse/sparse.py =================================================================== --- trunk/Lib/sparse/sparse.py 2007-06-29 04:23:55 UTC (rev 3121) +++ trunk/Lib/sparse/sparse.py 2007-06-29 06:59:02 UTC (rev 3122) @@ -498,7 +498,7 @@ self.indptr, self.indices, \ self.data, other.indptr, \ other.indices, other.data) - return self.__class__((data, ind, indptr), self.shape) + return self.__class__((data, ind, indptr), self.shape, check=False) elif isdense(other): # Convert this matrix to a dense matrix and add them return other + self.todense() @@ -558,7 +558,7 @@ self.indptr, self.indices, \ self.data, other.indptr, \ other.indices, other.data) - return self.__class__((data, ind, indptr), self.shape) + return self.__class__((data, ind, indptr), self.shape, check=False) else: raise TypeError, "unsupported type for sparse matrix power" @@ -573,7 +573,7 @@ indptr, ind, data = fn(M, N, self.indptr, self.indices, \ self.data, other.indptr, \ other.indices, other.data) - return self.__class__((data, ind, indptr), (M, N)) + return self.__class__((data, ind, indptr), (M, N),check=False) elif isdense(other): # This is SLOW! We need a more efficient implementation # of sparse * dense matrix multiplication! @@ -636,12 +636,8 @@ def copy(self): - new = self.__class__(self.shape, nzmax=self.nzmax, dtype=self.dtype) - new.data = self.data.copy() - new.indices = self.indices.copy() - new.indptr = self.indptr.copy() - new._check() - return new + return self.__class__((self.data.copy(),self.indices.copy(),self.indptr.copy()), \ + self.shape, dtype=self.dtype, check=False) def _get_slice(self, i, start, stop, stride, dims): @@ -667,11 +663,11 @@ def _transpose(self, cls, copy=False): M, N = self.shape - return cls((self.data,self.indices,self.indptr),(N,M),copy=copy) + return cls((self.data,self.indices,self.indptr),(N,M),copy=copy,check=False) def conj(self, copy=False): - return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy) + return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy,check=False) def _ensure_sorted_indices(self, shape0, shape1, inplace=False): """Return a copy of this matrix where the row indices are sorted @@ -706,7 +702,7 @@ - csc_matrix((data, row, ptr), [(M, N)]) standard CSC representation """ - def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False): + def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True): _cs_matrix.__init__(self) if isdense(arg1): self.dtype = getdtype(dtype, arg1) @@ -776,11 +772,11 @@ self.dtype = getdtype(dtype, s) if copy: self.data = array(s) - self.indices = array(rowind) + self.indices = array(rowind, dtype=intc) self.indptr = array(indptr, dtype=intc) else: self.data = asarray(s) - self.indices = asarray(rowind) + self.indices = asarray(rowind, dtype=intc) self.indptr = asarray(indptr, dtype=intc) except: raise ValueError, "unrecognized form for csc_matrix constructor" @@ -797,29 +793,37 @@ else: raise ValueError, "unrecognized form for csc_matrix constructor" - # Read existing matrix dimensions - try: - (oldM, oldN) = self.shape - except: - oldM = oldN = None + + # Read matrix dimensions given, if any if dims is not None: try: (M, N) = dims + M,N = int(M),int(N) except (TypeError, ValueError), e: raise TypeError, "dimensions not understood" else: + # Read existing matrix dimensions + try: + (oldM, oldN) = self.shape + except: + oldM = oldN = None + + # Expand if necessary M = N = None - if len(self.indices) > 0: - M = max(oldM, M, int(amax(self.indices)) + 1) - else: - # Matrix is completely empty - M = max(oldM, M) - N = max(0, oldN, N, len(self.indptr) - 1) + N = max(0, oldN, N, len(self.indptr) - 1) + if len(self.indices) > 0: + M = max(oldM, M, int(amax(self.indices)) + 1) + else: + # Matrix is completely empty + M = max(oldM, M) + self.shape = (M, N) - self._check() - def _check(self): + self._check(check) + + + def _check(self,full_check=True): # some functions pass floats self.shape = tuple([int(x) for x in self.shape]) @@ -832,15 +836,23 @@ "should be rank 1" if (len(self.data) != nzmax): raise ValueError, "data and row list should have same length" + if (self.indptr[0] != 0): + raise ValueError,"index pointer should start with 0" if (len(self.indptr) != N+1): raise ValueError, "index pointer should be of of size N+1" if (nzmax < nnz): raise ValueError, "nzmax must not be less than nnz" - if (nnz>0) and (amax(self.indices[:nnz]) >= M): - raise ValueError, "row values must be < M" - if (nnz>0) and (amin(self.indices[:nnz]) < 0): - raise ValueError, "row values must be >= 0" + if full_check: + #check format validity (more expensive) + if nnz > 0: + if amax(self.indices[:nnz]) >= M: + raise ValueError, "row values must be < M" + if amin(self.indices[:nnz]) < 0: + raise ValueError, "row values must be >= 0" + if numpy.diff(self.indptr).min() < 0: + raise ValueError,'indptr values must form a non-decreasing sequence' + if (self.indptr[-1] > len(self.indices)): raise ValueError, \ "Last value of index list should be less than "\ @@ -883,7 +895,7 @@ self.indptr, self.indices, \ self.data, ocs.indptr, \ ocs.indices, ocs.data) - return csc_matrix((data, rowind, indptr), self.shape) + return csc_matrix((data, rowind, indptr), self.shape, check=False) elif isdense(other): # Convert this matrix to a dense matrix and add them. return self.todense() + other @@ -1040,7 +1052,7 @@ def tocsr(self): indptr, colind, data = csctocsr(self.shape[0], self.shape[1], \ self.indptr, self.indices, self.data) - return csr_matrix((data, colind, indptr), self.shape) + return csr_matrix((data, colind, indptr), self.shape, check=False) def _toother(self): return self.tocsr() @@ -1092,7 +1104,7 @@ - csr_matrix((data, col, ptr), [dims=(M, N)]) standard CSR representation """ - def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False): + def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True): _cs_matrix.__init__(self) if isdense(arg1): self.dtype = getdtype(dtype, arg1) @@ -1157,11 +1169,11 @@ self.dtype = getdtype(dtype, s) if copy: self.data = array(s, dtype=self.dtype) - self.indices = array(colind) + self.indices = array(colind, dtype=intc) self.indptr = array(indptr, dtype=intc) else: self.data = asarray(s, dtype=self.dtype) - self.indices = asarray(colind) + self.indices = asarray(colind, dtype=intc) self.indptr = asarray(indptr, dtype=intc) else: # (data, ij) format @@ -1176,11 +1188,7 @@ else: raise ValueError, "unrecognized form for csr_matrix constructor" - # Read existing matrix dimensions - try: - (oldM, oldN) = self.shape - except: - oldM = oldN = None + # Read matrix dimensions given, if any if dims is not None: try: @@ -1188,17 +1196,25 @@ except (TypeError, ValueError), e: raise TypeError, "dimensions not understood" else: + # Read existing matrix dimensions + try: + (oldM, oldN) = self.shape + except: + oldM = oldN = None + M = N = None - M = max(0, oldM, M, len(self.indptr) - 1) - if len(self.indices) > 0: - N = max(oldN, N, int(amax(self.indices)) + 1) - else: - # Matrix is completely empty - N = max(oldN, N) + M = max(0, oldM, M, len(self.indptr) - 1) + if len(self.indices) > 0: + N = max(oldN, N, int(amax(self.indices)) + 1) + else: + # Matrix is completely empty + N = max(oldN, N) + self.shape = (M, N) - self._check() + + self._check(check) - def _check(self): + def _check(self,full_check=True): # some functions pass floats self.shape = tuple([int(x) for x in self.shape]) @@ -1211,12 +1227,22 @@ "should be rank 1" if (len(self.data) != nzmax): raise ValueError, "data and row list should have same length" + if (self.indptr[0] != 0): + raise ValueError,"index pointer should start with 0" if (len(self.indptr) != M+1): raise ValueError, "index pointer should be of length #rows + 1" - if (nnz>0) and (amax(self.indices[:nnz]) >= N): - raise ValueError, "column values must be < N" - if (nnz>0) and (amin(self.indices[:nnz]) < 0): - raise ValueError, "column values must be >= 0" + + + if full_check: + #check format validity (more expensive) + if nnz > 0: + if amax(self.indices[:nnz]) >= N: + raise ValueError, "column values must be < N" + if amin(self.indices[:nnz]) < 0: + raise ValueError, "column values must be >= 0" + if numpy.diff(self.indptr).min() < 0: + raise ValueError,'indptr values must form a non-decreasing sequence' + if (nnz > nzmax): raise ValueError, \ "last value of index list should be less than "\ @@ -1388,7 +1414,7 @@ def tocsc(self): indptr, rowind, data = csrtocsc(self.shape[0], self.shape[1], \ self.indptr, self.indices, self.data) - return csc_matrix((data, rowind, indptr), self.shape) + return csc_matrix((data, rowind, indptr), self.shape, check=False) def _toother(self): return self.tocsc() From scipy-svn at scipy.org Fri Jun 29 03:00:58 2007 From: scipy-svn at scipy.org (scipy-svn at scipy.org) Date: Fri, 29 Jun 2007 02:00:58 -0500 (CDT) Subject: [Scipy-svn] r3123 - in trunk/Lib: linalg/tests sparse/tests special/tests Message-ID: <20070629070058.C854B39C1AB@new.scipy.org> Author: wnbell Date: 2007-06-29 02:00:29 -0500 (Fri, 29 Jun 2007) New Revision: 3123 Modified: trunk/Lib/linalg/tests/test_iterative.py trunk/Lib/sparse/tests/test_sparse.py trunk/Lib/special/tests/test_basic.py Log: commented out some print statements in unittests Modified: trunk/Lib/linalg/tests/test_iterative.py =================================================================== --- trunk/Lib/linalg/tests/test_iterative.py 2007-06-29 06:59:02 UTC (rev 3122) +++ trunk/Lib/linalg/tests/test_iterative.py 2007-06-29 07:00:29 UTC (rev 3123) @@ -27,7 +27,7 @@ def callback(x): global A, b res = b-dot(A,x) - print "||A.x - b|| = " + str(norm(dot(A,x)-b)) + #print "||A.x - b|| = " + str(norm(dot(A,x)-b)) class test_iterative_solvers(NumpyTestCase): def __init__(self, *args, **kwds): Modified: trunk/Lib/sparse/tests/test_sparse.py =================================================================== --- trunk/Lib/sparse/tests/test_sparse.py 2007-06-29 06:59:02 UTC (rev 3122) +++ trunk/Lib/sparse/tests/test_sparse.py 2007-06-29 07:00:29 UTC (rev 3123) @@ -516,15 +516,15 @@ assert(e.A.dtype.type == mytype) def check_ensure_sorted_indices(self): - print 'sorting CSR indices' + #print 'sorting CSR indices' data = arange( 5 ) col = array( [7, 2, 1, 5, 4] ) ptr = [0, 3, 5] asp = csr_matrix( (data, col, ptr), dims = (2,10) ) bsp = asp.copy() - print 'in\n', asp + #print 'in\n', asp asp.ensure_sorted_indices( inplace = True ) - print 'out\n', asp + #print 'out\n', asp assert_array_equal(asp.indices,[1, 2, 7, 4, 5]) for ir in range( asp.shape[0] ): for ic in range( asp.shape[1] ): @@ -575,15 +575,15 @@ assert(e.A.dtype.type == mytype) def check_ensure_sorted_indices(self): - print 'sorting CSC indices' + #print 'sorting CSC indices' data = arange( 5 ) row = array( [7, 2, 1, 5, 4] ) ptr = [0, 3, 5] asp = csc_matrix( (data, row, ptr), dims = (10,2) ) bsp = asp.copy() - print 'in\n', asp + #print 'in\n', asp asp.ensure_sorted_indices( inplace = True ) - print 'out\n', asp + #print 'out\n', asp assert_array_equal(asp.indices,[1, 2, 7, 4, 5]) for ir in range( asp.shape[0] ): for ic in range( asp.shape[1] ): Modified: trunk/Lib/special/tests/test_basic.py =================================================================== --- trunk/Lib/special/tests/test_basic.py 2007-06-29 06:59:02 UTC (rev 3122) +++ trunk/Lib/special/tests/test_basic.py 2007-06-29 07:00:29 UTC (rev 3123) @@ -1814,7 +1814,7 @@ psub = poly1d([2,-1]) q = 4*rand() p = q-1 + 2*rand() - print "shifted jacobi p,q = ", p, q + #print "shifted jacobi p,q = ", p, q G0 = sh_jacobi(0,p,q) G1 = sh_jacobi(1,p,q) G2 = sh_jacobi(2,p,q)