[Scipy-svn] r3087 - in trunk/Lib/sandbox/pyem: . doc examples
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Jun 9 10:03:18 EDT 2007
Author: cdavid
Date: 2007-06-09 09:03:01 -0500 (Sat, 09 Jun 2007)
New Revision: 3087
Added:
trunk/Lib/sandbox/pyem/doc/pdfestimation.png
Modified:
trunk/Lib/sandbox/pyem/__init__.py
trunk/Lib/sandbox/pyem/_c_densities.py
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/doc/
trunk/Lib/sandbox/pyem/doc/Makefile
trunk/Lib/sandbox/pyem/doc/index.txt
trunk/Lib/sandbox/pyem/doc/tutorial.pdf
trunk/Lib/sandbox/pyem/examples/pdfestimation.py
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/info.py
trunk/Lib/sandbox/pyem/online_em.py
Log:
Heavy liftup of the code + docstrings.
Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/__init__.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
from info import __doc__
@@ -8,7 +8,7 @@
#from online_em import OnGMM as _OnGMM
#import examples as _examples
-__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ = filter(lambda s:not s.startswith('_'), dir())
from numpy.testing import NumpyTest
test = NumpyTest().test
Modified: trunk/Lib/sandbox/pyem/_c_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/_c_densities.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/_c_densities.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,34 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Thu Nov 09 05:00 PM 2006 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
+"""This module implements some function of densities module in C for efficiency
+reasons. gaussian, such as pdf estimation, confidence interval/ellipsoids,
+etc..."""
+
+__docformat__ = 'restructuredtext'
+
# This module uses a C implementation through ctypes, for diagonal cases
# TODO:
# - portable way to find/open the shared library
# - full cov matrice
+# - test before inclusion
import numpy as N
import numpy.linalg as lin
-from numpy.random import randn
-from scipy.stats import chi2
-import densities as D
+#from numpy.random import randn
+#from scipy.stats import chi2
+#import densities as D
import ctypes
-from ctypes import cdll, c_uint, c_int, c_double, POINTER
+from ctypes import c_uint, c_int
from numpy.ctypeslib import ndpointer, load_library
ctypes_major = int(ctypes.__version__.split('.')[0])
if ctypes_major < 1:
- msg = "version of ctypes is %s, expected at least %s" \
- % (ctypes.__version__, '1.0.0')
- raise ImportError(msg)
+ raise ImportError(msg = "version of ctypes is %s, expected at least %s"\
+ % (ctypes.__version__, '1.0.1'))
# Requirements for diag gden
_gden = load_library('c_gden.so', __file__)
@@ -75,9 +81,9 @@
if len(N.shape(x)) != 2:
raise DenError("x is not rank 2")
- (n, d) = x.shape
- (dm0, dm1) = mu.shape
- (dv0, dv1) = va.shape
+ (n, d) = N.shape(x)
+ (dm0, dm1) = N.shape(mu)
+ (dv0, dv1) = N.shape(va)
# Check x and mu same dimension
if dm0 != 1:
@@ -165,9 +171,9 @@
# inva.ctypes.data_as(POINTER(c_double)),
# y.ctypes.data_as(POINTER(c_double)))
else:
- y = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
+ y = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
for i in range(1, d):
- y += _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log)
+ y += _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log)
return y
def _full_gauss_den(x, mu, va, log):
@@ -199,19 +205,20 @@
return y
if __name__ == "__main__":
- #=========================================
- # Test accuracy between pure and C python
- #=========================================
- mu = N.array([2.0, 3])
- va = N.array([5.0, 3])
+ pass
+ ##=========================================
+ ## Test accuracy between pure and C python
+ ##=========================================
+ #mu = N.array([2.0, 3])
+ #va = N.array([5.0, 3])
- # Generate a multivariate gaussian of mean mu and covariance va
- nframes = 1e4
- X = randn(nframes, 2)
- Yc = N.dot(N.diag(N.sqrt(va)), X.transpose())
- Yc = Yc.transpose() + mu
+ ## Generate a multivariate gaussian of mean mu and covariance va
+ #nframes = 1e4
+ #X = randn(nframes, 2)
+ #Yc = N.dot(N.diag(N.sqrt(va)), X.transpose())
+ #Yc = Yc.transpose() + mu
- Y = D.gauss_den(Yc, mu, va)
- Yt = gauss_den(Yc, mu, va)
+ #Y = D.gauss_den(Yc, mu, va)
+ #Yt = gauss_den(Yc, mu, va)
- print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)
+ #print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)
Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,8 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 08:00 PM 2007 J
-"""This module implements various bsic functions related to multivariate
+# Last Change: Sat Jun 09 10:00 PM 2007 J
+"""This module implements various basic functions related to multivariate
gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
__docformat__ = 'restructuredtext'
@@ -50,10 +50,10 @@
pdf : ndarray
Returns a rank 1 array of the pdf at points x.
- Notes
- -----
- Vector are row vectors, except va which can be a matrix
- (row vector variance for diagonal variance)."""
+ Note
+ ----
+ Vector are row vectors, except va which can be a matrix
+ (row vector variance for diagonal variance)."""
lmu = N.atleast_2d(mu)
lva = N.atleast_2d(va)
Property changes on: trunk/Lib/sandbox/pyem/doc
___________________________________________________________________
Name: svn:ignore
- *.pyc
*.swp
*.pyd
*.so
*.prof
+ *.pyc
*.swp
*.pyd
*.so
*.prof
*.out
*.tex
Modified: trunk/Lib/sandbox/pyem/doc/Makefile
===================================================================
--- trunk/Lib/sandbox/pyem/doc/Makefile 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/Makefile 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,4 +1,4 @@
-# Last Change: Mon May 28 10:00 AM 2007 J
+# Last Change: Sat Jun 09 05:00 PM 2007 J
# This makefile is used to build the pdf from the rest file and inlined code
# from python examples
@@ -7,7 +7,7 @@
rst2tex = PYTHONPATH=/home/david/local/lib/python2.4/site-packages rst2newlatex.py \
--stylesheet-path base.tex --user-stylesheet user.tex
-pytexfiles = pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex
+pytexfiles = pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex pdfestimation.tex
SOURCEPATH = $(PWD)
@@ -24,15 +24,18 @@
pyem.tex: index.txt
$(rst2tex) $< > $@
-basic_example1.tex: examples/basic_example1.py
+basic_example1.tex: ../examples/basic_example1.py
$(py2tex) $< > $@
-basic_example2.tex: examples/basic_example2.py
+basic_example2.tex: ../examples/basic_example2.py
$(py2tex) $< > $@
-basic_example3.tex: examples/basic_example3.py
+basic_example3.tex: ../examples/basic_example3.py
$(py2tex) $< > $@
+pdfestimation.tex: ../examples/pdfestimation.py
+ $(py2tex) $< > $@
+
clean:
for i in $(pytexfiles); do \
rm -f `echo $$i`; \
Modified: trunk/Lib/sandbox/pyem/doc/index.txt
===================================================================
--- trunk/Lib/sandbox/pyem/doc/index.txt 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/index.txt 2007-06-09 14:03:01 UTC (rev 3087)
@@ -13,7 +13,7 @@
file: Bic_example.png
/restindex
-.. Last Change: Mon May 28 10:00 AM 2007 J
+.. Last Change: Sat Jun 09 07:00 PM 2007 J
===================================================
PyEM, a python package for Gaussian mixture models
@@ -176,14 +176,36 @@
Examples
=========
-TODO.
+Using EM for pdf estimation
+---------------------------
+The following example uses the old faithful dataset and is available in the
+example directory. It models the joint distribution (d(t), w(t+1)), where d(t)
+is the duration time, and w(t+1) the waiting time for the next eruption. It
+selects the best model using the BIC.
+
+.. raw:: latex
+
+ \input{pdfestimation.tex}
+
+.. figure:: pdfestimation.png
+ :width: 500
+ :height: 400
+
+ isodensity curves for the old faithful data modeled by a 1, 2, 3 and 4
+ componenits model (up to bottom, left to right).
+
+
Using EM for clustering
-----------------------
+TODO (this is fundamentally the same than pdf estimation, though)
+
Using PyEM for supervised learning
----------------------------------
+TODO
+
Note on performances
====================
Added: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
===================================================================
(Binary files differ)
Property changes on: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)
Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,15 +1,11 @@
#! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 07:00 PM 2007 J
# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
import numpy as N
-from numpy.testing import set_package_path, restore_path
-
import pylab as P
-set_package_path()
-import pyem
-restore_path()
+from scipy.sandbox import pyem
import utils
oldfaithful = utils.get_faithful()
@@ -45,6 +41,8 @@
X, Y, Z, V = gm.density_on_grid()
P.contour(X, Y, Z, V)
P.plot(dt[:, 0], dt[:, 1], '.')
+ P.xlabel('duration time (scaled)')
+ P.ylabel('waiting time (scaled)')
print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
P.show()
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,14 @@
# /usr/bin/python
-# Last Change: Sat Jun 09 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
-# Module to implement GaussianMixture class.
+"""Module implementing GM, a class which represents Gaussian mixtures.
+GM instances can be used to create, sample mixtures. They also provide
+different plotting facilities, such as isodensity contour for multi dimensional
+models, ellipses of confidence."""
+
+__docformat__ = 'restructuredtext'
+
import numpy as N
from numpy.random import randn, rand
import numpy.linalg as lin
@@ -21,12 +27,12 @@
# be used as long as w, mu and va are not set
# - We have to use scipy now for chisquare pdf, so there may be other
# methods to be used, ie for implementing random index.
-# - there is no check on internal state of the GM, that is does w, mu and va values
-# make sense (eg singular values)
-# - plot1d is still very rhough. There should be a sensible way to
-# modify the result plot (maybe returns a dic with global pdf, component pdf and
-# fill matplotlib handles). Should be coherent with plot
-class GmParamError:
+# - there is no check on internal state of the GM, that is does w, mu and va
+# values make sense (eg singular values) - plot1d is still very rhough. There
+# should be a sensible way to modify the result plot (maybe returns a dic
+# with global pdf, component pdf and fill matplotlib handles). Should be
+# coherent with plot
+class GmParamError(Exception):
"""Exception raised for errors in gmm params
Attributes:
@@ -34,6 +40,7 @@
message -- explanation of the error
"""
def __init__(self, message):
+ Exception.__init__(self)
self.message = message
def __str__(self):
@@ -52,11 +59,27 @@
# Methods to construct a mixture
#===============================
def __init__(self, d, k, mode = 'diag'):
- """Init a Gaussian Mixture of k components, each component being a
- d multi-variate Gaussian, with covariance matrix of style mode.
-
- If you want to build a Gaussian Mixture with knowns weights, means
- and variances, you can use GM.fromvalues method directly"""
+ """Init a Gaussian Mixture.
+
+ :Parameters:
+ d : int
+ dimension of the mixture.
+ k : int
+ number of component in the mixture.
+ mode : string
+ mode of covariance
+
+ :Returns:
+ an instance of GM.
+
+ Note
+ ----
+
+ Only full and diag mode are supported for now.
+
+ :SeeAlso:
+ If you want to build a Gaussian Mixture with knowns weights, means
+ and variances, you can use GM.fromvalues method directly"""
if mode not in self._cov_mod:
raise GmParamError("mode %s not recognized" + str(mode))
@@ -80,16 +103,42 @@
self.is1d = True
def set_param(self, weights, mu, sigma):
- """Set parameters of the model. Args should
- be conformant with metparameters d and k given during
- initialisation"""
+ """Set parameters of the model.
+
+ Args should be conformant with metparameters d and k given during
+ initialisation.
+
+ :Parameters:
+ weights : ndarray
+ weights of the mixture (k elements)
+ mu : ndarray
+ means of the mixture. One component's mean per row, k row for k
+ components.
+ sigma : ndarray
+ variances of the mixture. For diagonal models, one row contains
+ the diagonal elements of the covariance matrix. For full
+ covariance, d rows for one variance.
+
+ Examples
+ --------
+ Create a 3 component, 2 dimension mixture with full covariance matrices
+
+ >>> w = numpy.array([0.2, 0.5, 0.3])
+ >>> mu = numpy.array([[0., 0.], [1., 1.]])
+ >>> va = numpy.array([[1., 0.], [0., 1.], [2., 0.5], [0.5, 1]])
+ >>> gm = GM(2, 3, 'full')
+ >>> gm.set_param(w, mu, va)
+
+ :SeeAlso:
+ If you know already the parameters when creating the model, you can
+ simply use the method class GM.fromvalues."""
k, d, mode = check_gmm_param(weights, mu, sigma)
if not k == self.k:
raise GmParamError("Number of given components is %d, expected %d"
% (k, self.k))
if not d == self.d:
- raise GmParamError("Dimension of the given model is %d, expected %d"
- % (d, self.d))
+ raise GmParamError("Dimension of the given model is %d, "\
+ "expected %d" % (d, self.d))
if not mode == self.mode and not d == 1:
raise GmParamError("Given covariance mode is %s, expected %s"
% (mode, self.mode))
@@ -104,16 +153,34 @@
"""This class method can be used to create a GM model
directly from its parameters weights, mean and variance
- w, mu, va = GM.gen_param(d, k)
- gm = GM(d, k)
- gm.set_param(w, mu, va)
+ :Parameters:
+ weights : ndarray
+ weights of the mixture (k elements)
+ mu : ndarray
+ means of the mixture. One component's mean per row, k row for k
+ components.
+ sigma : ndarray
+ variances of the mixture. For diagonal models, one row contains
+ the diagonal elements of the covariance matrix. For full
+ covariance, d rows for one variance.
+ :Returns:
+ gm : GM
+ an instance of GM.
+
+ Examples
+ --------
+
+ >>> w, mu, va = GM.gen_param(d, k)
+ >>> gm = GM(d, k)
+ >>> gm.set_param(w, mu, va)
+
and
- w, mu, va = GM.gen_param(d, k)
- gm = GM.fromvalue(w, mu, va)
+ >>> w, mu, va = GM.gen_param(d, k)
+ >>> gm = GM.fromvalue(w, mu, va)
- Are equivalent """
+ are strictly equivalent."""
k, d, mode = check_gmm_param(weights, mu, sigma)
res = cls(d, k, mode)
res.set_param(weights, mu, sigma)
@@ -123,7 +190,15 @@
# Fundamental facilities (sampling, confidence, etc..)
#=====================================================
def sample(self, nframes):
- """ Sample nframes frames from the model """
+ """ Sample nframes frames from the model.
+
+ :Parameters:
+ nframes : int
+ number of samples to draw.
+
+ :Returns:
+ samples : ndarray
+ samples in the format one sample per row (nframes, d)."""
if not self.is_valid:
raise GmParamError("""Parameters of the model has not been
set yet, please set them using self.set_param()""")
@@ -134,47 +209,60 @@
X = randn(nframes, self.d)
if self.mode == 'diag':
- X = self.mu[S, :] + X * N.sqrt(self.va[S,:])
+ X = self.mu[S, :] + X * N.sqrt(self.va[S, :])
elif self.mode == 'full':
# Faster:
cho = N.zeros((self.k, self.va.shape[1], self.va.shape[1]))
for i in range(self.k):
# Using cholesky looks more stable than sqrtm; sqrtm is not
# available in numpy anyway, only in scipy...
- cho[i] = lin.cholesky(self.va[i*self.d:i*self.d+self.d,:])
+ cho[i] = lin.cholesky(self.va[i*self.d:i*self.d+self.d, :])
for s in range(self.k):
tmpind = N.where(S == s)[0]
X[tmpind] = N.dot(X[tmpind], cho[s].transpose()) + self.mu[s]
else:
- raise GmParamError('cov matrix mode not recognized, this is a bug !')
+ raise GmParamError("cov matrix mode not recognized, "\
+ "this is a bug !")
return X
- def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
+ def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
level = misc.DEF_LEVEL):
"""Returns a list of confidence ellipsoids describing the Gmm
defined by mu and va. Check densities.gauss_ell for details
- Returns:
- -Xe: a list of x coordinates for the ellipses (Xe[i] is
- the array containing x coordinates of the ith Gaussian)
- -Ye: a list of y coordinates for the ellipses
+ :Parameters:
+ dim : sequence
+ sequences of two integers which represent the dimensions where to
+ project the ellipsoid.
+ npoints : int
+ number of points to generate for the ellipse.
+ level : float
+ level of confidence (between 0 and 1).
- Example:
+ :Returns:
+ Xe : sequence
+ a list of x coordinates for the ellipses (Xe[i] is the array
+ containing x coordinates of the ith Gaussian)
+ Ye : sequence
+ a list of y coordinates for the ellipses.
+
+ Examples
+ --------
Suppose we have w, mu and va as parameters for a mixture, then:
- gm = GM(d, k)
- gm.set_param(w, mu, va)
- X = gm.sample(1000)
- Xe, Ye = gm.conf_ellipsoids()
- pylab.plot(X[:,0], X[:, 1], '.')
- for k in len(w):
- pylab.plot(Xe[k], Ye[k], 'r')
+ >>> gm = GM(d, k)
+ >>> gm.set_param(w, mu, va)
+ >>> X = gm.sample(1000)
+ >>> Xe, Ye = gm.conf_ellipsoids()
+ >>> pylab.plot(X[:,0], X[:, 1], '.')
+ >>> for k in len(w):
+ ... pylab.plot(Xe[k], Ye[k], 'r')
Will plot samples X draw from the mixture model, and
plot the ellipses of equi-probability from the mean with
- fixed level of confidence 0.39. """
+ default level of confidence."""
if self.is1d:
raise ValueError("This function does not make sense for 1d "
"mixtures.")
@@ -187,14 +275,14 @@
Ye = []
if self.mode == 'diag':
for i in range(self.k):
- xe, ye = densities.gauss_ell(self.mu[i,:], self.va[i,:],
+ xe, ye = densities.gauss_ell(self.mu[i, :], self.va[i, :],
dim, npoints, level)
Xe.append(xe)
Ye.append(ye)
elif self.mode == 'full':
for i in range(self.k):
- xe, ye = densities.gauss_ell(self.mu[i,:],
- self.va[i*self.d:i*self.d+self.d,:],
+ xe, ye = densities.gauss_ell(self.mu[i, :],
+ self.va[i*self.d:i*self.d+self.d, :],
dim, npoints, level)
Xe.append(xe)
Ye.append(ye)
@@ -202,8 +290,11 @@
return Xe, Ye
def check_state(self):
+ """Returns true if the parameters of the model are valid.
+
+ For Gaussian mixtures, this means weights summing to 1, and variances
+ to be positive definite.
"""
- """
if not self.is_valid:
raise GmParamError("""Parameters of the model has not been
set yet, please set them using self.set_param()""")
@@ -222,18 +313,33 @@
cond = N.zeros(self.k)
ava = N.absolute(self.va)
for c in range(self.k):
- cond[c] = N.amax(ava[c,:]) / N.amin(ava[c,:])
+ cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
print cond
- def gen_param(self, d, nc, varmode = 'diag', spread = 1):
- """Generate valid parameters for a gaussian mixture model.
- d is the dimension, nc the number of components, and varmode
- the mode for cov matrices.
+ @classmethod
+ def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
+ """Generate random, valid parameters for a gaussian mixture model.
+ :Parameters:
+ d : int
+ the dimension
+ nc : int
+ the number of components
+ varmode : string
+ covariance matrix mode ('full' or 'diag').
+
+ :Returns:
+ w : ndarray
+ weights of the mixture
+ mu : ndarray
+ means of the mixture
+ w : ndarray
+ variances of the mixture
+
+ Notes
+ -----
This is a class method.
-
- Returns: w, mu, va
"""
w = abs(randn(nc))
w = w / sum(w, 0)
@@ -251,13 +357,13 @@
return w, mu, va
- gen_param = classmethod(gen_param)
+ #gen_param = classmethod(gen_param)
- #=======================
- # Regularization methods
- #=======================
- def _regularize(self):
- raise NotImplemented("No regularization")
+ # #=======================
+ # # Regularization methods
+ # #=======================
+ # def _regularize(self):
+ # raise NotImplemented("No regularization")
#=================
# Plotting methods
@@ -266,10 +372,29 @@
level = misc.DEF_LEVEL):
"""Plot the ellipsoides directly for the model
- Returns a list of lines, so that their style can be modified. By default,
- the style is red color, and nolegend for all of them.
+ Returns a list of lines handle, so that their style can be modified. By
+ default, the style is red color, and nolegend for all of them.
- Does not work for 1d"""
+ :Parameters:
+ dim : sequence
+ sequence of two integers, the dimensions of interest.
+ npoints : int
+ Number of points to use for the ellipsoids.
+ level : int
+ level of confidence (to use with fill argument)
+
+ :Returns:
+ h : sequence
+ Returns a list of lines handle so that their properties
+ can be modified (eg color, label, etc...):
+
+ Note
+ ----
+ Does not work for 1d. Requires matplotlib
+
+ :SeeAlso:
+ conf_ellipses is used to compute the ellipses. Use this if you want
+ to plot with something else than matplotlib."""
if self.is1d:
raise ValueError("This function does not make sense for 1d "
"mixtures.")
@@ -282,22 +407,32 @@
Xe, Ye = self.conf_ellipses(dim, npoints, level)
try:
import pylab as P
- return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
+ return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in
+ range(k)]
#for i in range(k):
# P.plot(Xe[i], Ye[i], 'r')
except ImportError:
raise GmParamError("matplotlib not found, cannot plot...")
- def plot1d(self, level = 0.5, fill = 0, gpdf = 0):
- """This function plots the pdfs of each component of the model.
- If gpdf is 1, also plots the global pdf. If fill is 1, fill confidence
- areas using level argument as a level value
+ def plot1d(self, level = misc.DEF_LEVEL, fill = False, gpdf = False):
+ """Plots the pdf of each component of the 1d mixture.
- Returns a dictionary h of plot handles so that their properties can
- be modified (eg color, label, etc...):
- - h['pdf'] is a list of lines, one line per component pdf
- - h['gpdf'] is the line for the global pdf
- - h['conf'] is a list of filling area
+ :Parameters:
+ level : int
+ level of confidence (to use with fill argument)
+ fill : bool
+ if True, the area of the pdf corresponding to the given
+ confidence intervales is filled.
+ gpdf : bool
+ if True, the global pdf is plot.
+
+ :Returns:
+ h : dict
+ Returns a dictionary h of plot handles so that their properties
+ can be modified (eg color, label, etc...):
+ - h['pdf'] is a list of lines, one line per component pdf
+ - h['gpdf'] is the line for the global pdf
+ - h['conf'] is a list of filling area
"""
if not self.is1d:
raise ValueError("This function does not make sense for "
@@ -310,12 +445,12 @@
raise GmParamError("the model is not one dimensional model")
from scipy.stats import norm
nrm = norm(0, 1)
- pval = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2)
+ pval = N.sqrt(self.va[:, 0]) * nrm.ppf((1+level)/2)
# Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
# gives the range we are taking in account for each gaussian
mc = 3
- std = N.sqrt(self.va[:,0])
+ std = N.sqrt(self.va[:, 0])
m = N.amin(self.mu[:, 0] - mc * std)
M = N.amax(self.mu[:, 0] + mc * std)
@@ -326,7 +461,7 @@
# Prepare the dic of plot handles to return
ks = ['pdf', 'conf', 'gpdf']
- hp = dict((i,[]) for i in ks)
+ hp = dict((i, []) for i in ks)
try:
import pylab as P
for c in range(self.k):
@@ -336,7 +471,8 @@
h = P.plot(x, y, 'r', label ='_nolegend_')
hp['pdf'].extend(h)
if fill:
- #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + self.mu[c][0],
+ #P.axvspan(-pval[c] + self.mu[c][0], pval[c] +
+ #self.mu[c][0],
# facecolor = 'b', alpha = 0.2)
id1 = -pval[c] + self.mu[c]
id2 = pval[c] + self.mu[c]
@@ -350,7 +486,8 @@
facecolor = 'b', alpha = 0.1, label='_nolegend_')
hp['conf'].extend(h)
#P.fill([xc[0], xc[0], xc[-1], xc[-1]],
- # [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = 0.2)
+ # [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha =
+ # 0.2)
if gpdf:
h = P.plot(x, Yt, 'r:', label='_nolegend_')
hp['gpdf'] = h
@@ -363,7 +500,7 @@
the pdf of the mixture."""
# XXX: have a public function to compute the pdf at given points
# instead...
- std = N.sqrt(self.va[:,0])
+ std = N.sqrt(self.va[:, 0])
retval = N.empty((x.size, self.k))
for c in range(self.k):
retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
@@ -373,9 +510,30 @@
def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
maxlevel = 0.95):
- """Do all the necessary computation for contour plot of mixture's density.
+ """Do all the necessary computation for contour plot of mixture's
+ density.
- Returns X, Y, Z and V as expected by mpl contour function."""
+ :Parameters:
+ dim : sequence
+ sequence of two integers, the dimensions of interest.
+ nx : int
+ Number of points to use for the x axis of the grid
+ ny : int
+ Number of points to use for the y axis of the grid
+
+ :Returns:
+ X : ndarray
+ points of the x axis of the grid
+ Y : ndarray
+ points of the y axis of the grid
+ Z : ndarray
+ values of the density on X and Y
+ V : ndarray
+ Contour values to display.
+
+ Note
+ ----
+ X, Y, Z and V are as expected by matplotlib contour function."""
if self.is1d:
raise ValueError("This function does not make sense for 1d "
"mixtures.")
@@ -397,13 +555,14 @@
X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
lden = N.log(den)
+ # XXX: how to find "good" values for level ?
V = [-5, -3, -1, -0.5, ]
V.extend(N.linspace(0, N.max(lden), 4).tolist())
return X, Y, lden, N.array(V)
- def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM):
+ def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
"""Helper function to compute density contours on a grid."""
- gr = N.meshgrid(xrange, yrange)
+ gr = N.meshgrid(rangex, rangey)
X = gr[0].flatten()
Y = gr[1].flatten()
xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
@@ -412,7 +571,7 @@
dva = self._get_va(dim)
den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
den = N.sum(den, 1)
- den = den.reshape(len(yrange), len(xrange))
+ den = den.reshape(len(rangey), len(rangex))
X = gr[0]
Y = gr[1]
@@ -435,16 +594,16 @@
# Syntactic sugar
def __repr__(self):
- repr = ""
- repr += "Gaussian Mixture:\n"
- repr += " -> %d dimensions\n" % self.d
- repr += " -> %d components\n" % self.k
- repr += " -> %s covariance \n" % self.mode
+ msg = ""
+ msg += "Gaussian Mixture:\n"
+ msg += " -> %d dimensions\n" % self.d
+ msg += " -> %d components\n" % self.k
+ msg += " -> %s covariance \n" % self.mode
if self.is_valid:
- repr += "Has initial values"""
+ msg += "Has initial values"""
else:
- repr += "Has no initial values yet"""
- return repr
+ msg += "Has no initial values yet"""
+ return msg
def __str__(self):
return self.__repr__()
@@ -472,19 +631,26 @@
def check_gmm_param(w, mu, va):
"""Check that w, mu and va are valid parameters for
- a mixture of gaussian: w should sum to 1, there should
- be the same number of component in each param, the variances
- should be positive definite, etc...
+ a mixture of gaussian.
- Params:
- w = vector or list of weigths of the mixture (K elements)
- mu = matrix: K * d
- va = list of variances (vector K * d or square matrices Kd * d)
+ w should sum to 1, there should be the same number of component in each
+ param, the variances should be positive definite, etc...
+
+ :Parameters:
+ w : ndarray
+ vector or list of weigths of the mixture (K elements)
+ mu : ndarray
+ matrix: K * d
+ va : ndarray
+ list of variances (vector K * d or square matrices Kd * d)
- returns:
- K = number of components
- d = dimension
- mode = 'diag' if diagonal covariance, 'full' of full matrices
+ :Returns:
+ k : int
+ number of components
+ d : int
+ dimension
+ mode : string
+ 'diag' if diagonal covariance, 'full' of full matrices
"""
# Check that w is valid
@@ -527,34 +693,35 @@
return K, d, mode
if __name__ == '__main__':
- # Meta parameters:
- # - k = number of components
- # - d = dimension
- # - mode : mode of covariance matrices
- d = 5
- k = 4
+ pass
+ ## # Meta parameters:
+ ## # - k = number of components
+ ## # - d = dimension
+ ## # - mode : mode of covariance matrices
+ ## d = 5
+ ## k = 4
- # Now, drawing a model
- mode = 'full'
- nframes = 1e3
+ ## # Now, drawing a model
+ ## mode = 'full'
+ ## nframes = 1e3
- # Build a model with random parameters
- w, mu, va = GM.gen_param(d, k, mode, spread = 3)
- gm = GM.fromvalues(w, mu, va)
+ ## # Build a model with random parameters
+ ## w, mu, va = GM.gen_param(d, k, mode, spread = 3)
+ ## gm = GM.fromvalues(w, mu, va)
- # Sample nframes frames from the model
- X = gm.sample(nframes)
+ ## # Sample nframes frames from the model
+ ## X = gm.sample(nframes)
- # Plot the data
- import pylab as P
- P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
+ ## # Plot the data
+ ## import pylab as P
+ ## P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
- # Real confidence ellipses with confidence level
- level = 0.50
- h = gm.plot(level=level)
+ ## # Real confidence ellipses with confidence level
+ ## level = 0.50
+ ## h = gm.plot(level=level)
- # set the first ellipse label, which will appear in the legend
- h[0].set_label('confidence ell at level ' + str(level))
+ ## # set the first ellipse label, which will appear in the legend
+ ## h[0].set_label('confidence ell at level ' + str(level))
- P.legend(loc = 0)
- P.show()
+ ## P.legend(loc = 0)
+ ## P.show()
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,6 +1,12 @@
# /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
+"""Module implementing GMM, a class to estimate Gaussian mixture models using
+EM, and EM, a class which use GMM instances to estimate models parameters using
+the ExpectationMaximization algorithm."""
+
+__docformat__ = 'restructuredtext'
+
# TODO:
# - which methods to avoid va shrinking to 0 ? There are several options,
# not sure which ones are appropriates
@@ -8,22 +14,23 @@
# - online EM
import numpy as N
-import numpy.linalg as lin
+#import numpy.linalg as lin
from numpy.random import randn
#import _c_densities as densities
import densities
#from kmean import kmean
from scipy.cluster.vq import kmeans2 as kmean
-from gauss_mix import GM
+#from gauss_mix import GM
-from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
+#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
# Error classes
class GmmError(Exception):
"""Base class for exceptions in this module."""
- pass
+ def __init__(self):
+ Exception.__init__(self)
-class GmmParamError:
+class GmmParamError(GmmError):
"""Exception raised for errors in gmm params
Attributes:
@@ -31,41 +38,33 @@
message -- explanation of the error
"""
def __init__(self, message):
+ GmmError.__init__(self)
self.message = message
def __str__(self):
return self.message
-# Not sure yet about how to design different mixture models. Most of the code
-# is different # (pdf, update part of EM, etc...) and I am not sure it makes
-# sense to use inheritance for # interface specification in python, since its
-# dynamic type systeme.
-
-# Anyway, a mixture model class should encapsulates all details
-# concerning getting sufficient statistics (SS), likelihood and bic.
class MixtureModel(object):
pass
class ExpMixtureModel(MixtureModel):
- """Class to model mixture of exponential pdf (eg Gaussian, exponential, Laplace,
- etc..). This is a special case because some parts of EM are common for those
- models..."""
+ """Class to model mixture of exponential pdf (eg Gaussian, exponential,
+ Laplace, etc..). This is a special case because some parts of EM are common
+ for those models..."""
pass
class GMM(ExpMixtureModel):
- """ A class to model a Gaussian Mixture Model (GMM). An instance of
- this class is created by giving weights, mean and variances in the ctor.
- An instanciated object can be sampled, trained by EM.
-
- The class method gen_model can be used without instanciation."""
-
+ """ A class to model a Gaussian Mixture Model (GMM). An instance of this
+ class is created by giving weights, mean and variances in the ctor. An
+ instanciated object can be sampled, trained by EM. """
def init_kmean(self, data, niter = 5):
""" Init the model with kmean."""
k = self.gm.k
d = self.gm.d
init = data[0:k, :]
- # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+ # XXX: This is bogus initialization should do better (in kmean or here,
+ # do not know yet): should
(code, label) = kmean(data, init, niter, minit = 'matrix')
w = N.ones(k) / k
@@ -74,14 +73,15 @@
va = N.zeros((k, d))
for i in range(k):
for j in range(d):
- va[i,j] = N.cov(data[N.where(label==i), j], rowvar = 0)
+ va[i, j] = N.cov(data[N.where(label==i), j], rowvar = 0)
elif self.gm.mode == 'full':
va = N.zeros((k*d, d))
for i in range(k):
- va[i*d:i*d+d,:] = \
+ va[i*d:i*d+d, :] = \
N.cov(data[N.where(label==i)], rowvar = 0)
else:
- raise GmmParamError("mode " + str(mode) + " not recognized")
+ raise GmmParamError("mode " + str(self.gm.mode) + \
+ " not recognized")
self.gm.set_param(w, mu, va)
@@ -96,8 +96,8 @@
mu = randn(k, d)
va = N.fabs(randn(k, d))
else:
- raise GmmParamError("""init_random not implemented for
- mode %s yet""", mode)
+ raise GmmParamError("init_random not implemented for "
+ "mode %s yet", self.gm.mode)
self.gm.set_param(w, mu, va)
@@ -109,8 +109,18 @@
# - To handle the different modes, we could do something "fancy" such as
# replacing methods, to avoid checking cases everywhere and unconsistency.
def __init__(self, gm, init = 'kmean'):
- """ Initialize a GMM with weight w, mean mu and variances va, and initialization
- method for training init (kmean by default)"""
+ """Initialize a mixture model.
+
+ Initialize the model from a GM instance. This class implements all the
+ necessary functionalities for EM.
+
+ :Parameters:
+ gm : GM
+ the mixture model to train.
+ init : string
+ initialization method to use.
+
+ """
self.gm = gm
# Possible init methods
@@ -124,17 +134,18 @@
self.initst = init
def sufficient_statistics(self, data):
- """ Return normalized and non-normalized sufficient statistics
- from the model.
+ """Compute responsabilities.
- Computes the latent variable distribution (a
- posteriori probability) knowing the explicit data
- for the Gaussian model (w, mu, var): gamma(t, i) =
- P[state = i | observation = data(t); w, mu, va]
+ Return normalized and non-normalized sufficient statistics from the
+ model.
+
+ Note
+ ----
+ Computes the latent variable distribution (a posteriori probability)
+ knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
+ i) = P[state = i | observation = data(t); w, mu, va]
This is basically the E step of EM for GMM."""
- n = data.shape[0]
-
# compute the gaussian pdf
tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
# multiply by the weight
@@ -149,22 +160,22 @@
from the a posteriori pdf, computed by gmm_posterior
(E step).
"""
- k = self.gm.k
- d = self.gm.d
- n = data.shape[0]
- invn = 1.0/n
- mGamma = N.sum(gamma, axis = 0)
+ k = self.gm.k
+ d = self.gm.d
+ n = data.shape[0]
+ invn = 1.0/n
+ mGamma = N.sum(gamma, axis = 0)
if self.gm.mode == 'diag':
- mu = N.zeros((k, d))
- va = N.zeros((k, d))
- gamma = gamma.T
+ mu = N.zeros((k, d))
+ va = N.zeros((k, d))
+ gamma = gamma.T
for c in range(k):
- x = N.dot(gamma[c:c+1,:], data)[0,:]
- xx = N.dot(gamma[c:c+1,:], data ** 2)[0,:]
+ x = N.dot(gamma[c:c+1, :], data)[0, :]
+ xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :]
- mu[c,:] = x / mGamma[c]
- va[c,:] = xx / mGamma[c] - mu[c,:] ** 2
+ mu[c, :] = x / mGamma[c]
+ va[c, :] = xx / mGamma[c] - mu[c, :] ** 2
w = invn * mGamma
elif self.gm.mode == 'full':
@@ -177,21 +188,22 @@
mu = N.zeros((k, d))
va = N.zeros((k*d, d))
- gamma = gamma.transpose()
+ gamma = gamma.transpose()
for c in range(k):
#x = N.sum(N.outer(gamma[:, c],
# N.ones((1, d))) * data, axis = 0)
- x = N.dot(gamma[c:c+1,:], data)[0,:]
- xx = N.zeros((d, d))
+ x = N.dot(gamma[c:c+1, :], data)[0, :]
+ xx = N.zeros((d, d))
# This should be much faster than recursing on n...
for i in range(d):
for j in range(d):
- xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[c,:], axis = 0)
+ xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :],
+ axis = 0)
- mu[c,:] = x / mGamma[c]
- va[c*d:c*d+d,:] = xx / mGamma[c] - \
- N.outer(mu[c,:], mu[c,:])
+ mu[c, :] = x / mGamma[c]
+ va[c*d:c*d+d, :] = xx / mGamma[c] \
+ - N.outer(mu[c, :], mu[c, :])
w = invn * mGamma
else:
raise GmmParamError("varmode not recognized")
@@ -226,19 +238,17 @@
of the definition given here. """
if self.gm.mode == 'diag':
- """ for a diagonal model, we have
- k - 1 (k weigths, but one constraint of normality)
- + k * d (means) + k * d (variances) """
+ # for a diagonal model, we have k - 1 (k weigths, but one
+ # constraint of normality) + k * d (means) + k * d (variances)
free_deg = self.gm.k * (self.gm.d * 2 + 1) - 1
elif self.gm.mode == 'full':
- """ for a full model, we have
- k - 1 (k weigths, but one constraint of normality)
- + k * d (means) + k * d * d / 2 (each covariance matrice
- has d **2 params, but with positivity constraint) """
+ # for a full model, we have k - 1 (k weigths, but one constraint of
+ # normality) + k * d (means) + k * d * d / 2 (each covariance
+ # matrice has d **2 params, but with positivity constraint)
if self.gm.d == 1:
- free_deg = self.gm.k * 3 - 1
+ free_deg = self.gm.k * 3 - 1
else:
- free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
+ free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
lk = self.likelihood(data)
n = N.shape(data)[0]
@@ -261,21 +271,32 @@
pass
def train(self, data, model, maxiter = 10, thresh = 1e-5):
- """
- Train a model using data, and stops when the likelihood fails
- behind a threshold, or when the number of iterations > niter,
- whichever comes first
+ """Train a model using EM.
- Args:
- - data: contains the observed features, one row is one frame, ie one
- observation of dimension d
- - model: object of class Mixture
- - maxiter: maximum number of iterations
+ Train a model using data, and stops when the likelihood increase
+ between two consecutive iteration fails behind a threshold, or when the
+ number of iterations > niter, whichever comes first
- The model is trained, and its parameters updated accordingly.
+ :Parameters:
+ data : ndarray
+ contains the observed features, one row is one frame, ie one
+ observation of dimension d
+ model : GMM
+ GMM instance.
+ maxiter : int
+ maximum number of iterations
+ thresh : threshold
+ if the slope of the likelihood falls below this value, the
+ algorithm stops.
- Returns:
- likelihood (one value per iteration).
+ :Returns:
+ likelihood : ndarray
+ one value per iteration.
+
+ Note
+ ----
+ The model is trained, and its parameters updated accordingly, eg the
+ results are put in the GMM instance.
"""
if not isinstance(model, MixtureModel):
raise TypeError("expect a MixtureModel as a model")
@@ -296,62 +317,24 @@
model.update_em(data, g)
if has_em_converged(like[i], like[i-1], thresh):
return like[0:i]
- # # Em computation, with computation of the likelihood
- # g, tgd = model.sufficient_statistics(data)
- # like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
- # model.update_em(data, g)
- # for i in range(1, maxiter):
- # print "=== Iteration %d ===" % i
- # isreg = False
- # for j in range(model.gm.k):
- # va = model.gm.va[j]
- # if va.any() < _MIN_INV_COND:
- # isreg = True
- # print "\tregularization detected"
- # print "\t" + str(va)
- # model.gm.va[j] = regularize_diag(va)
- # print "\t" + str(va) + ", " + str(model.gm.va[j])
- # print "\t" + str(gauss_den(data, model.gm.mu[j], model.gm.va[j]))
- # print "\tend regularization detected"
- # var = va
- #
- # g, tgd = model.sufficient_statistics(data)
- # try:
- # assert not( (N.isnan(tgd)).any() )
- # if isreg:
- # print var
- # except AssertionError:
- # print "tgd is nan..."
- # print model.gm.va[13,:]
- # print 1/model.gm.va[13,:]
- # print densities.gauss_den(data, model.gm.mu[13], model.gm.va[13])
- # print N.isnan((multiple_gauss_den(data, model.gm.mu, model.gm.va))).any()
- # print "Exciting"
- # import sys
- # sys.exit(-1)
- # like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
- # model.update_em(data, g)
- # assert not( model.gm.va.any() < 1e-6)
- # if has_em_converged(like[i], like[i-1], thresh):
- # return like[0:i]
return like
-def regularize_diag(variance, alpha = _DEF_ALPHA):
- delta = N.sum(variance) / variance.size
- if delta > _MIN_DBL_DELTA:
- return variance + alpha * delta
- else:
- return variance + alpha * _MIN_DBL_DELTA
+#def regularize_diag(variance, alpha = _DEF_ALPHA):
+# delta = N.sum(variance) / variance.size
+# if delta > _MIN_DBL_DELTA:
+# return variance + alpha * delta
+# else:
+# return variance + alpha * _MIN_DBL_DELTA
+#
+#def regularize_full(variance):
+# # Trace of a positive definite matrix is always > 0
+# delta = N.trace(variance) / variance.shape[0]
+# if delta > _MIN_DBL_DELTA:
+# return variance + alpha * delta
+# else:
+# return variance + alpha * _MIN_DBL_DELTA
-def regularize_full(variance):
- # Trace of a positive definite matrix is always > 0
- delta = N.trace(variance) / variance.shape[0]
- if delta > _MIN_DBL_DELTA:
- return variance + alpha * delta
- else:
- return variance + alpha * _MIN_DBL_DELTA
-
# Misc functions
def bic(lk, deg, n):
""" Expects lk to be log likelihood """
@@ -369,127 +352,129 @@
return False
if __name__ == "__main__":
- import copy
- #=============================
- # Simple GMM with 5 components
- #=============================
+ pass
+ ## import copy
+ ## #=============================
+ ## # Simple GMM with 5 components
+ ## #=============================
- #+++++++++++++++++++++++++++++
- # Meta parameters of the model
- # - k: Number of components
- # - d: dimension of each Gaussian
- # - mode: Mode of covariance matrix: full or diag
- # - nframes: number of frames (frame = one data point = one
- # row of d elements
- k = 2
- d = 1
- mode = 'full'
- nframes = 1e3
+ ## #+++++++++++++++++++++++++++++
+ ## # Meta parameters of the model
+ ## # - k: Number of components
+ ## # - d: dimension of each Gaussian
+ ## # - mode: Mode of covariance matrix: full or diag
+ ## # - nframes: number of frames (frame = one data point = one
+ ## # row of d elements
+ ## k = 2
+ ## d = 1
+ ## mode = 'full'
+ ## nframes = 1e3
- #+++++++++++++++++++++++++++++++++++++++++++
- # Create an artificial GMM model, samples it
- #+++++++++++++++++++++++++++++++++++++++++++
- print "Generating the mixture"
- # Generate a model with k components, d dimensions
- w, mu, va = GM.gen_param(d, k, mode, spread = 3)
- gm = GM(d, k, mode)
- gm.set_param(w, mu, va)
+ ## #+++++++++++++++++++++++++++++++++++++++++++
+ ## # Create an artificial GMM model, samples it
+ ## #+++++++++++++++++++++++++++++++++++++++++++
+ ## print "Generating the mixture"
+ ## # Generate a model with k components, d dimensions
+ ## w, mu, va = GM.gen_param(d, k, mode, spread = 3)
+ ## gm = GM(d, k, mode)
+ ## gm.set_param(w, mu, va)
- # Sample nframes frames from the model
- data = gm.sample(nframes)
+ ## # Sample nframes frames from the model
+ ## data = gm.sample(nframes)
- #++++++++++++++++++++++++
- # Learn the model with EM
- #++++++++++++++++++++++++
+ ## #++++++++++++++++++++++++
+ ## # Learn the model with EM
+ ## #++++++++++++++++++++++++
- # Init the model
- print "Init a model for learning, with kmean for initialization"
- lgm = GM(d, k, mode)
- gmm = GMM(lgm, 'kmean')
- gmm.init(data)
+ ## # Init the model
+ ## print "Init a model for learning, with kmean for initialization"
+ ## lgm = GM(d, k, mode)
+ ## gmm = GMM(lgm, 'kmean')
+ ## gmm.init(data)
- # Keep the initialized model for drawing
- gm0 = copy.copy(lgm)
+ ## # Keep the initialized model for drawing
+ ## gm0 = copy.copy(lgm)
- # The actual EM, with likelihood computation
- niter = 10
- like = N.zeros(niter)
+ ## # The actual EM, with likelihood computation
+ ## niter = 10
+ ## like = N.zeros(niter)
- print "computing..."
- for i in range(niter):
- g, tgd = gmm.sufficient_statistics(data)
- like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
- gmm.update_em(data, g)
- # # Alternative form, by using EM class: as the EM class
- # # is quite rudimentary now, it is not very useful, just save
- # # a few lines
- # em = EM()
- # like = em.train(data, gmm, niter)
+ ## print "computing..."
+ ## for i in range(niter):
+ ## g, tgd = gmm.sufficient_statistics(data)
+ ## like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+ ## gmm.update_em(data, g)
+ ## # # Alternative form, by using EM class: as the EM class
+ ## # # is quite rudimentary now, it is not very useful, just save
+ ## # # a few lines
+ ## # em = EM()
+ ## # like = em.train(data, gmm, niter)
- #+++++++++++++++
- # Draw the model
- #+++++++++++++++
- print "drawing..."
- import pylab as P
- P.subplot(2, 1, 1)
+ ## #+++++++++++++++
+ ## # Draw the model
+ ## #+++++++++++++++
+ ## print "drawing..."
+ ## import pylab as P
+ ## P.subplot(2, 1, 1)
- if not d == 1:
- # Draw what is happening
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+ ## if not d == 1:
+ ## # Draw what is happening
+ ## P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
- # Real confidence ellipses
- Xre, Yre = gm.conf_ellipses()
- P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
- for i in range(1,k):
- P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
+ ## # Real confidence ellipses
+ ## Xre, Yre = gm.conf_ellipses()
+ ## P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
+ ## for i in range(1,k):
+ ## P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
- # Initial confidence ellipses as found by kmean
- X0e, Y0e = gm0.conf_ellipses()
- P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
- for i in range(1,k):
- P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
+ ## # Initial confidence ellipses as found by kmean
+ ## X0e, Y0e = gm0.conf_ellipses()
+ ## P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
+ ## for i in range(1,k):
+ ## P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
- # Values found by EM
- Xe, Ye = lgm.conf_ellipses()
- P.plot(Xe[0], Ye[0], 'r', label = 'confidence ellipsoides found by EM')
- for i in range(1,k):
- P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
- P.legend(loc = 0)
- else:
- # Real confidence ellipses
- h = gm.plot1d()
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
+ ## # Values found by EM
+ ## Xe, Ye = lgm.conf_ellipses()
+ ## P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by"
+ ## "EM")
+ ## for i in range(1,k):
+ ## P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
+ ## P.legend(loc = 0)
+ ## else:
+ ## # Real confidence ellipses
+ ## h = gm.plot1d()
+ ## [i.set_color('g') for i in h['pdf']]
+ ## h['pdf'][0].set_label('true pdf')
- # Initial confidence ellipses as found by kmean
- h0 = gm0.plot1d()
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
+ ## # Initial confidence ellipses as found by kmean
+ ## h0 = gm0.plot1d()
+ ## [i.set_color('k') for i in h0['pdf']]
+ ## h0['pdf'][0].set_label('initial pdf')
- # Values found by EM
- hl = lgm.plot1d(fill = 1, level = 0.66)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
+ ## # Values found by EM
+ ## hl = lgm.plot1d(fill = 1, level = 0.66)
+ ## [i.set_color('r') for i in hl['pdf']]
+ ## hl['pdf'][0].set_label('pdf found by EM')
- P.legend(loc = 0)
+ ## P.legend(loc = 0)
- P.subplot(2, 1, 2)
- P.plot(like)
- P.title('log likelihood')
+ ## P.subplot(2, 1, 2)
+ ## P.plot(like)
+ ## P.title('log likelihood')
- # #++++++++++++++++++
- # # Export the figure
- # #++++++++++++++++++
- # F = P.gcf()
- # DPI = F.get_dpi()
- # DefaultSize = F.get_size_inches()
- # # the default is 100dpi for savefig:
- # F.savefig("example1.png")
+ ## # #++++++++++++++++++
+ ## # # Export the figure
+ ## # #++++++++++++++++++
+ ## # F = P.gcf()
+ ## # DPI = F.get_dpi()
+ ## # DefaultSize = F.get_size_inches()
+ ## # # the default is 100dpi for savefig:
+ ## # F.savefig("example1.png")
- # # Now make the image twice as big, while keeping the fonts and all the
- # # same size
- # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
- # Size = F.get_size_inches()
- # print "Size in Inches", Size
- # F.savefig("example2.png")
- P.show()
+ ## # # Now make the image twice as big, while keeping the fonts and all the
+ ## # # same size
+ ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
+ ## # Size = F.get_size_inches()
+ ## # print "Size in Inches", Size
+ ## # F.savefig("example2.png")
+ ## P.show()
Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/info.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,61 +1,63 @@
"""
-Routines for Gaussian Mixture Models
-and learning with Expectation Maximization
-==========================================
+Routines for Gaussian Mixture Models and learning with Expectation Maximization
+===============================================================================
-This module contains classes and function to compute multivariate Gaussian densities
-(diagonal and full covariance matrices), Gaussian mixtures, Gaussian mixtures models
-and an Em trainer.
+This module contains classes and function to compute multivariate Gaussian
+densities (diagonal and full covariance matrices), Gaussian mixtures, Gaussian
+mixtures models and an Em trainer.
More specifically, the module defines the following classes, functions:
- densities.gauss_den: function to compute multivariate Gaussian pdf
-- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can be
-created from its parameters weights, mean and variances, or from its meta parameters
-d (dimension of the Gaussian) and k (number of components in the mixture). A Gaussian
-Model can then be sampled or plot (if d>1, plot confidence ellipsoids projected on
-2 chosen dimensions, if d == 1, plot the pdf of each component and fill the zone
-of confidence for a given level)
-- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is constructed
-from a GM model gm, and can be used to train gm. The GMM can be initiated by
-kmean or at random, and can compute sufficient statistics, and update its parameters
-from the sufficient statistics.
-- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq kmeans, since
-its does not give membership of observations.
+- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can
+ be created from its parameters weights, mean and variances, or from its meta
+ parameters d (dimension of the Gaussian) and k (number of components in the
+ mixture). A Gaussian Model can then be sampled or plot (if d>1, plot
+ confidence ellipsoids projected on 2 chosen dimensions, if d == 1, plot the
+ pdf of each component and fill the zone of confidence for a given level)
+- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is
+ constructed from a GM model gm, and can be used to train gm. The GMM can be
+ initiated by kmean or at random, and can compute sufficient statistics, and
+ update its parameters from the sufficient statistics.
+- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq
+ kmeans, since its does not give membership of observations.
Example of use:
- #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- # Create an artificial 2 dimension, 3 clusters GM model, samples it
- #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- w, mu, va = GM.gen_param(2, 3, 'diag', spread = 1.5)
- gm = GM.fromvalues(w, mu, va)
+---------------
- # Sample 1000 frames from the model
- data = gm.sample(1000)
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> # Create an artificial 2 dimension, 3 clusters GM model, samples it
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> w, mu, va = GM.gen_param(2, 3, 'diag', spread = 1.5)
+>>> gm = GM.fromvalues(w, mu, va)
+>>>
+>>> # Sample 1000 frames from the model
+>>> data = gm.sample(1000)
+>>>
+>>> #++++++++++++++++++++++++
+>>> # Learn the model with EM
+>>> #++++++++++++++++++++++++
+>>> # Init the model
+>>> lgm = GM(d, k, mode)
+>>> gmm = GMM(lgm, 'kmean')
+>>>
+>>> # The actual EM, with likelihood computation. The threshold
+>>> # is compared to the (linearly appromixated) derivative of the likelihood
+>>> em = EM()
+>>> like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
- #++++++++++++++++++++++++
- # Learn the model with EM
- #++++++++++++++++++++++++
- # Init the model
- lgm = GM(d, k, mode)
- gmm = GMM(lgm, 'kmean')
-
- # The actual EM, with likelihood computation. The threshold
- # is compared to the (linearly appromixated) derivative of the likelihood
- em = EM()
- like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
Files example.py and example2.py show more capabilities of the toolbox, including
plotting capabilities (using matplotlib) and model selection using Bayesian
Information Criterion (BIC).
Bibliography:
- * Maximum likelihood from incomplete data via the EM algorithm in Journal of
- the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P. Dempster,
- N. M. Laird, and D. B. Rubin
- * Bayesian Approaches to Gaussian Mixture Modelling (1998) by
- Stephen J. Roberts, Dirk Husmeier, Iead Rezek, William Penny in
- IEEE Transactions on Pattern Analysis and Machine Intelligence
+
+- Maximum likelihood from incomplete data via the EM algorithm in Journal of
+ the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P.
+ Dempster, N. M. Laird, and D. B. Rubin
+- Bayesian Approaches to Gaussian Mixture Modelling (1998) by Stephen J.
+ Roberts, Dirk Husmeier, Iead Rezek, William Penny in IEEE Transactions on
+ Pattern Analysis and Machine Intelligence
Copyright: David Cournapeau 2006
License: BSD-style (see LICENSE.txt in main source directory)
Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py 2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/online_em.py 2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,27 @@
# /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
-#---------------------------------------------
-# This is not meant to be used yet !!!! I am
-# not sure how to integrate this stuff inside
-# the package yet. The cases are:
-# - we have a set of data, and we want to test online EM
-# compared to normal EM
-# - we do not have all the data before putting them in online EM:
-# eg current frame depends on previous frame in some way.
+# This is not meant to be used yet !!!! I am not sure how to integrate this
+# stuff inside the package yet. The cases are:
+# - we have a set of data, and we want to test online EM compared to normal
+# EM
+# - we do not have all the data before putting them in online EM: eg current
+# frame depends on previous frame in some way.
# TODO:
# - Add biblio
-# - Look back at articles for discussion for init, regularization and
+# - Look back at articles for discussion for init, regularization and
# convergence rates
-# - the function sufficient_statistics does not really return SS. This is not a
-# big problem, but it would be better to really return them as the name implied.
+# - the function sufficient_statistics does not really return SS. This is not
+# a big problem, but it would be better to really return them as the name
+# implied.
import numpy as N
from numpy import mean
from numpy.testing import assert_array_almost_equal, assert_array_equal
-from gmm_em import ExpMixtureModel, GMM, EM
-from gauss_mix import GM
+from gmm_em import ExpMixtureModel#, GMM, EM
+#from gauss_mix import GM
from scipy.cluster.vq import kmeans2 as kmean
import densities2 as D
@@ -60,22 +59,24 @@
k = self.gm.k
d = self.gm.d
if self.gm.mode == 'diag':
- w = N.ones(k) / k
+ w = N.ones(k) / k
# Init the internal state of EM
- self.cx = N.outer(w, mean(init_data, 0))
- self.cxx = N.outer(w, mean(init_data ** 2, 0))
+ self.cx = N.outer(w, mean(init_data, 0))
+ self.cxx = N.outer(w, mean(init_data ** 2, 0))
# w, mu and va init is the same that in the standard case
- (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
- mu = code.copy()
- va = N.zeros((k, d))
+ (code, label) = kmean(init_data, init_data[0:k, :], iter = 10,
+ minit = 'matrix')
+ mu = code.copy()
+ va = N.zeros((k, d))
for i in range(k):
for j in range(d):
- va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+ va [i, j] = N.cov(init_data[N.where(label==i), j],
+ rowvar = 0)
else:
raise OnGmmParamError("""init_online not implemented for
- mode %s yet""", mode)
+ mode %s yet""", self.gm.mode)
self.gm.set_param(w, mu, va)
# c* are the parameters which are computed at every step (ie
@@ -95,22 +96,24 @@
k = self.gm.k
d = self.gm.d
if self.gm.mode == 'diag':
- w = N.ones(k) / k
+ w = N.ones(k) / k
# Init the internal state of EM
- self.cx = N.outer(w, mean(init_data, 0))
- self.cxx = N.outer(w, mean(init_data ** 2, 0))
+ self.cx = N.outer(w, mean(init_data, 0))
+ self.cxx = N.outer(w, mean(init_data ** 2, 0))
# w, mu and va init is the same that in the standard case
- (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
- mu = code.copy()
- va = N.zeros((k, d))
+ (code, label) = kmean(init_data, init_data[0:k, :],
+ iter = niter, minit = 'matrix')
+ mu = code.copy()
+ va = N.zeros((k, d))
for i in range(k):
for j in range(d):
- va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+ va[i, j] = N.cov(init_data[N.where(label==i), j],
+ rowvar = 0)
else:
raise OnGmmParamError("""init_online not implemented for
- mode %s yet""", mode)
+ mode %s yet""", self.gm.mode)
self.gm.set_param(w, mu, va)
# c* are the parameters which are computed at every step (ie
@@ -278,132 +281,133 @@
if __name__ == '__main__':
- d = 1
- k = 2
- mode = 'diag'
- nframes = int(5e3)
- emiter = 4
- seed(5)
+ pass
+ #d = 1
+ #k = 2
+ #mode = 'diag'
+ #nframes = int(5e3)
+ #emiter = 4
+ #seed(5)
- #+++++++++++++++++++++++++++++++++++++++++++++++++
- # Generate a model with k components, d dimensions
- #+++++++++++++++++++++++++++++++++++++++++++++++++
- w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
- gm = GM.fromvalues(w, mu, va)
- # Sample nframes frames from the model
- data = gm.sample(nframes)
+ ##+++++++++++++++++++++++++++++++++++++++++++++++++
+ ## Generate a model with k components, d dimensions
+ ##+++++++++++++++++++++++++++++++++++++++++++++++++
+ #w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
+ #gm = GM.fromvalues(w, mu, va)
+ ## Sample nframes frames from the model
+ #data = gm.sample(nframes)
- #++++++++++++++++++++++++++++++++++++++++++
- # Approximate the models with classical EM
- #++++++++++++++++++++++++++++++++++++++++++
- # Init the model
- lgm = GM(d, k, mode)
- gmm = GMM(lgm, 'kmean')
- gmm.init(data)
+ ##++++++++++++++++++++++++++++++++++++++++++
+ ## Approximate the models with classical EM
+ ##++++++++++++++++++++++++++++++++++++++++++
+ ## Init the model
+ #lgm = GM(d, k, mode)
+ #gmm = GMM(lgm, 'kmean')
+ #gmm.init(data)
- gm0 = copy.copy(gmm.gm)
- # The actual EM, with likelihood computation
- like = N.zeros(emiter)
- for i in range(emiter):
- g, tgd = gmm.sufficient_statistics(data)
- like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
- gmm.update_em(data, g)
+ #gm0 = copy.copy(gmm.gm)
+ ## The actual EM, with likelihood computation
+ #like = N.zeros(emiter)
+ #for i in range(emiter):
+ # g, tgd = gmm.sufficient_statistics(data)
+ # like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+ # gmm.update_em(data, g)
- #++++++++++++++++++++++++++++++++++++++++
- # Approximate the models with online EM
- #++++++++++++++++++++++++++++++++++++++++
- ogm = GM(d, k, mode)
- ogmm = OnGMM(ogm, 'kmean')
- init_data = data[0:nframes / 20, :]
- ogmm.init(init_data)
+ ##++++++++++++++++++++++++++++++++++++++++
+ ## Approximate the models with online EM
+ ##++++++++++++++++++++++++++++++++++++++++
+ #ogm = GM(d, k, mode)
+ #ogmm = OnGMM(ogm, 'kmean')
+ #init_data = data[0:nframes / 20, :]
+ #ogmm.init(init_data)
- # Forgetting param
- ku = 0.005
- t0 = 200
- lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
- nu0 = 0.2
- nu = N.zeros((len(lamb), 1))
- nu[0] = nu0
- for i in range(1, len(lamb)):
- nu[i] = 1./(1 + lamb[i] / nu[i-1])
+ ## Forgetting param
+ #ku = 0.005
+ #t0 = 200
+ #lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+ #nu0 = 0.2
+ #nu = N.zeros((len(lamb), 1))
+ #nu[0] = nu0
+ #for i in range(1, len(lamb)):
+ # nu[i] = 1./(1 + lamb[i] / nu[i-1])
- print "meth1"
- # object version of online EM
- for t in range(nframes):
- ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
- ogmm.update_em_frame()
+ #print "meth1"
+ ## object version of online EM
+ #for t in range(nframes):
+ # ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
+ # ogmm.update_em_frame()
- ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+ #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
- # 1d optimized version
- ogm2 = GM(d, k, mode)
- ogmm2 = OnGMM1d(ogm2, 'kmean')
- ogmm2.init(init_data[:, 0])
+ ## 1d optimized version
+ #ogm2 = GM(d, k, mode)
+ #ogmm2 = OnGMM1d(ogm2, 'kmean')
+ #ogmm2.init(init_data[:, 0])
- print "meth2"
- # object version of online EM
- for t in range(nframes):
- ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
- ogmm2.update_em_frame()
+ #print "meth2"
+ ## object version of online EM
+ #for t in range(nframes):
+ # ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
+ # ogmm2.update_em_frame()
- #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
+ ##ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
- print ogmm.cw
- print ogmm2.cw
- #+++++++++++++++
- # Draw the model
- #+++++++++++++++
- print "drawing..."
- import pylab as P
- P.subplot(2, 1, 1)
+ #print ogmm.cw
+ #print ogmm2.cw
+ ##+++++++++++++++
+ ## Draw the model
+ ##+++++++++++++++
+ #print "drawing..."
+ #import pylab as P
+ #P.subplot(2, 1, 1)
- if not d == 1:
- # Draw what is happening
- P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+ #if not d == 1:
+ # # Draw what is happening
+ # P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
- h = gm.plot()
- [i.set_color('g') for i in h]
- h[0].set_label('true confidence ellipsoides')
+ # h = gm.plot()
+ # [i.set_color('g') for i in h]
+ # h[0].set_label('true confidence ellipsoides')
- h = gm0.plot()
- [i.set_color('k') for i in h]
- h[0].set_label('initial confidence ellipsoides')
+ # h = gm0.plot()
+ # [i.set_color('k') for i in h]
+ # h[0].set_label('initial confidence ellipsoides')
- h = lgm.plot()
- [i.set_color('r') for i in h]
- h[0].set_label('confidence ellipsoides found by EM')
+ # h = lgm.plot()
+ # [i.set_color('r') for i in h]
+ # h[0].set_label('confidence ellipsoides found by EM')
- h = ogmm.gm.plot()
- [i.set_color('m') for i in h]
- h[0].set_label('confidence ellipsoides found by Online EM')
+ # h = ogmm.gm.plot()
+ # [i.set_color('m') for i in h]
+ # h[0].set_label('confidence ellipsoides found by Online EM')
- # P.legend(loc = 0)
- else:
- # Real confidence ellipses
- h = gm.plot1d()
- [i.set_color('g') for i in h['pdf']]
- h['pdf'][0].set_label('true pdf')
+ # # P.legend(loc = 0)
+ #else:
+ # # Real confidence ellipses
+ # h = gm.plot1d()
+ # [i.set_color('g') for i in h['pdf']]
+ # h['pdf'][0].set_label('true pdf')
- # Initial confidence ellipses as found by kmean
- h0 = gm0.plot1d()
- [i.set_color('k') for i in h0['pdf']]
- h0['pdf'][0].set_label('initial pdf')
+ # # Initial confidence ellipses as found by kmean
+ # h0 = gm0.plot1d()
+ # [i.set_color('k') for i in h0['pdf']]
+ # h0['pdf'][0].set_label('initial pdf')
- # Values found by EM
- hl = lgm.plot1d(fill = 1, level = 0.66)
- [i.set_color('r') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by EM')
+ # # Values found by EM
+ # hl = lgm.plot1d(fill = 1, level = 0.66)
+ # [i.set_color('r') for i in hl['pdf']]
+ # hl['pdf'][0].set_label('pdf found by EM')
- P.legend(loc = 0)
+ # P.legend(loc = 0)
- # Values found by Online EM
- hl = ogmm.gm.plot1d(fill = 1, level = 0.66)
- [i.set_color('m') for i in hl['pdf']]
- hl['pdf'][0].set_label('pdf found by Online EM')
+ # # Values found by Online EM
+ # hl = ogmm.gm.plot1d(fill = 1, level = 0.66)
+ # [i.set_color('m') for i in hl['pdf']]
+ # hl['pdf'][0].set_label('pdf found by Online EM')
- P.legend(loc = 0)
+ # P.legend(loc = 0)
- P.subplot(2, 1, 2)
- P.plot(nu)
- P.title('Learning rate')
- P.show()
+ #P.subplot(2, 1, 2)
+ #P.plot(nu)
+ #P.title('Learning rate')
+ #P.show()
More information about the Scipy-svn
mailing list