[Scipy-svn] r3080 - in trunk/Lib/sandbox/pyem: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Jun 9 02:16:29 EDT 2007
Author: cdavid
Date: 2007-06-09 01:16:08 -0500 (Sat, 09 Jun 2007)
New Revision: 3080
Modified:
trunk/Lib/sandbox/pyem/README
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/misc.py
trunk/Lib/sandbox/pyem/tests/test_densities.py
Log:
Polish contour functions, so that choosing the dimension of projection works.
Modified: trunk/Lib/sandbox/pyem/README
===================================================================
--- trunk/Lib/sandbox/pyem/README 2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/README 2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,7 +1,5 @@
-Last Change: Fri Aug 04 07:00 PM 2006 J
+Last Change: Sat Jun 09 12:00 PM 2007 J
-Version 0.4.2
-
pyem is a python module build upon numpy and scipy
(see http://www.scipy.org/) for learning mixtures models
using Expectation Maximization. For now, only Gaussian
@@ -10,16 +8,6 @@
* computation of Gaussian pdf for multi-variate Gaussian
random vectors (spherical, diagonal and full covariance matrices)
* Sampling of Gaussian Mixtures Models
- * Confidence ellipsoides with probability (fixed level of
- 0.39 for now)
+ * Confidence ellipsoides with probability at arbitrary level
* Classic EM for Gaussian Mixture Models
* K-mean based and random initialization for EM available
-
-Has been tested on the following platforms:
-
- * Ubuntu dapper, bi Xeon 3.2 Ghz, 2 Go RAM
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, pentium M 1.2 ghz,. 512 Mo Ram
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, minimac (ppc G4 1.42 Ghz, 1Gb RAM)
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py 2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/densities.py 2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,12 +1,13 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
import numpy as N
import numpy.linalg as lin
from numpy.random import randn
from scipy.stats import chi2
+import misc
# Error classes
class DenError(Exception):
@@ -164,19 +165,28 @@
return y
-# To plot a confidence ellipse from multi-variate gaussian pdf
-def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39):
+# To get coordinatea of a confidence ellipse from multi-variate gaussian pdf
+def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \
+ npoints = misc._DEF_ELL_NP, \
+ level = misc._DEF_LEVEL):
""" Given a mean and covariance for multi-variate
gaussian, returns npoints points for the ellipse
of confidence given by level (all points will be inside
the ellipsoides with a probability equal to level)
Returns the coordinate x and y of the ellipse"""
+ if level >= 1 or level <= 0:
+ raise ValueError("level should be a scale strictly between 0 and 1.""")
mu = N.atleast_1d(mu)
va = N.atleast_1d(va)
+ d = mu.shape[0]
c = N.array(dim)
+ print c, d
+ if N.any(c < 0) or N.any(c >= d):
+ raise ValueError("dim elements should be >= 0 and < %d (dimension"\
+ " of the variance)" % d)
if mu.size == va.size:
mode = 'diag'
else:
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 03:00 PM 2007 J
# Module to implement GaussianMixture class.
@@ -7,7 +7,7 @@
from numpy.random import randn, rand
import numpy.linalg as lin
import densities
-from misc import _MAX_DBL_DEV
+import misc
# Right now, two main usages of a Gaussian Model are possible
# - init a Gaussian Model with meta-parameters, and trains it
@@ -147,7 +147,8 @@
return X
- def conf_ellipses(self, *args, **kargs):
+ def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \
+ level = misc._DEF_LEVEL):
"""Returns a list of confidence ellipsoids describing the Gmm
defined by mu and va. Check densities.gauss_ell for details
@@ -179,14 +180,14 @@
if self.mode == 'diag':
for i in range(self.k):
xe, ye = densities.gauss_ell(self.mu[i,:], self.va[i,:],
- *args, **kargs)
+ dim, npoints, level)
Xe.append(xe)
Ye.append(ye)
elif self.mode == 'full':
for i in range(self.k):
xe, ye = densities.gauss_ell(self.mu[i,:],
self.va[i*self.d:i*self.d+self.d,:],
- *args, **kargs)
+ dim, npoints, level)
Xe.append(xe)
Ye.append(ye)
@@ -253,7 +254,8 @@
#=================
# Plotting methods
#=================
- def plot(self, *args, **kargs):
+ def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP,
+ level = misc._DEF_LEVEL):
"""Plot the ellipsoides directly for the model
Returns a list of lines, so that their style can be modified. By default,
@@ -266,7 +268,7 @@
assert self.d > 1
k = self.k
- Xe, Ye = self.conf_ellipses(*args, **kargs)
+ Xe, Ye = self.conf_ellipses(dim, npoints, level)
try:
import pylab as P
return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
@@ -354,7 +356,8 @@
return retval
- def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+ def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50,
+ maxlevel = 0.95):
"""Do all the necessary computation for contour plot of mixture's density.
Returns X, Y, Z and V as expected by mpl contour function."""
@@ -368,33 +371,49 @@
# contribution to the pdf).
# XXX: we need log pdf, not the pdf... this can save some computing
- Xe, Ye = self.conf_ellipses(level = maxlevel)
+ Xe, Ye = self.conf_ellipses(level = maxlevel, dim = dim)
ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
w = ax[1] - ax[0]
h = ax[3] - ax[2]
X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
- N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+ N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
lden = N.log(den)
V = [-5, -3, -1, -0.5, ]
V.extend(N.linspace(0, N.max(lden), 4).tolist())
return X, Y, lden, N.array(V)
- def _densityctr(self, xrange, yrange):
+ def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM):
"""Helper function to compute density contours on a grid."""
gr = N.meshgrid(xrange, yrange)
X = gr[0].flatten()
Y = gr[1].flatten()
xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
# XXX refactor computing pdf
- d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
- d = N.sum(d, 1)
- d = d.reshape(len(yrange), len(xrange))
+ dmu = self.mu[:, dim]
+ dva = self._get_va(dim)
+ den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
+ den = N.sum(den, 1)
+ den = den.reshape(len(yrange), len(xrange))
X = gr[0]
Y = gr[1]
- return X, Y, d
+ return X, Y, den
+ def _get_va(self, dim):
+ """Returns variance limited do dimension in dim."""
+ dim = N.array(dim)
+ if dim.any() < 0 or dim.any() >= self.d:
+ raise ValueError("dim elements should be between 0 and dimension"\
+ " of the mixture.")
+ if self.mode == 'diag':
+ return self.va[:, dim]
+ elif self.mode == 'full':
+ tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
+ tidx.flatten()
+ return self.va[tidx, dim]
+ else:
+ raise ValueError("Unkown mode")
# Syntactic sugar
def __repr__(self):
repr = ""
@@ -450,7 +469,7 @@
"""
# Check that w is valid
- if N.fabs(N.sum(w, 0) - 1) > _MAX_DBL_DEV:
+ if N.fabs(N.sum(w, 0) - 1) > misc._MAX_DBL_DEV:
raise GmParamError('weight does not sum to 1')
if not len(w.shape) == 1:
Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py 2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/misc.py 2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,12 @@
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Sat Jun 09 12:00 PM 2007 J
+#========================================================
+# Constants used throughout the module (def args, etc...)
+#========================================================
+# This is the default dimension for representing confidence ellipses
+_DEF_VIS_DIM = [0, 1]
+_DEF_ELL_NP = 100
+_DEF_LEVEL = 0.39
#=====================================================================
# "magic number", that is number used to control regularization and co
# Change them at your risk !
Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
# TODO:
# - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -13,6 +13,7 @@
set_package_path()
from pyem.densities import gauss_den
+import pyem.densities
restore_path()
#Optional:
@@ -105,89 +106,15 @@
self._generate_test_data_2d_full()
self._check(level)
+class test_gauss_ell(NumpyTestCase):
+ def test_dim(self):
+ pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 1])
+ try:
+ pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 2])
+ raise AssertionError("this call should not succeed, bogus dim.")
+ except ValueError, e:
+ print "Call with bogus dim did not succeed, OK"
+
+
if __name__ == "__main__":
NumpyTest().run()
-
-# def generate_test_data(n, d, mode = 'diag', file='test.dat'):
-# """Generate a set of data of dimension d, with n frames,
-# that is input data, mean, var and output of gden, so that
-# other implementations can be tested against"""
-# mu = randn(1, d)
-# if mode == 'diag':
-# va = abs(randn(1, d))
-# elif mode == 'full':
-# va = randn(d, d)
-# va = dot(va, va.transpose())
-#
-# input = randn(n, d)
-# output = gauss_den(input, mu, va)
-#
-# import tables
-# h5file = tables.openFile(file, "w")
-#
-# h5file.createArray(h5file.root, 'input', input)
-# h5file.createArray(h5file.root, 'mu', mu)
-# h5file.createArray(h5file.root, 'va', va)
-# h5file.createArray(h5file.root, 'output', output)
-#
-# h5file.close()
-#
-# def test_gauss_den():
-# """"""
-# # import tables
-# # import numpy as N
-# #
-# # filename = 'dendata.h5'
-#
-# # # # Dimension 1
-# # # d = 1
-# # # mu = 1.0
-# # # va = 2.0
-#
-# # # X = randn(1e3, 1)
-#
-# # # Y = gauss_den(X, mu, va)
-#
-# # # h5file = tables.openFile(filename, "w")
-#
-# # # h5file.createArray(h5file.root, 'X', X)
-# # # h5file.createArray(h5file.root, 'mu', mu)
-# # # h5file.createArray(h5file.root, 'va', va)
-# # # h5file.createArray(h5file.root, 'Y', Y)
-#
-# # # h5file.close()
-#
-# # # # Dimension 2, diag
-# # # d = 2
-# # # mu = N.array([1.0, -2.0])
-# # # va = N.array([1.0, 2.0])
-#
-# # # X = randn(1e3, 2)
-#
-# # # Y = gauss_den(X, mu, va)
-#
-# # # h5file = tables.openFile(filename, "w")
-#
-# # # h5file.createArray(h5file.root, 'X', X)
-# # # h5file.createArray(h5file.root, 'mu', mu)
-# # # h5file.createArray(h5file.root, 'va', va)
-# # # h5file.createArray(h5file.root, 'Y', Y)
-#
-# # # Dimension 2, full
-# # d = 2
-# # mu = N.array([[0.2, -1.0]])
-# # va = N.array([[1.2, 0.1], [0.1, 0.5]])
-#
-# # X = randn(1e3, 2)
-#
-# # Y = gauss_den(X, mu, va)
-#
-# # h5file = tables.openFile(filename, "w")
-#
-# # h5file.createArray(h5file.root, 'X', X)
-# # h5file.createArray(h5file.root, 'mu', mu)
-# # h5file.createArray(h5file.root, 'va', va)
-# # h5file.createArray(h5file.root, 'Y', Y)
-#
-# # h5file.close()
-#
More information about the Scipy-svn
mailing list