[Scipy-svn] r3080 - in trunk/Lib/sandbox/pyem: . tests

Sat Jun 9 02:16:29 EDT 2007

Author: cdavid
Date: 2007-06-09 01:16:08 -0500 (Sat, 09 Jun 2007)
New Revision: 3080

Modified:
   trunk/Lib/sandbox/pyem/README
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/misc.py
   trunk/Lib/sandbox/pyem/tests/test_densities.py
Log:
Polish contour functions, so that choosing the dimension of projection works.

Modified: trunk/Lib/sandbox/pyem/README
===================================================================

--- trunk/Lib/sandbox/pyem/README	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/README	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,7 +1,5 @@
-Last Change: Fri Aug 04 07:00 PM 2006 J
+Last Change: Sat Jun 09 12:00 PM 2007 J
 
-Version 0.4.2
-
 pyem is a python module build upon numpy and scipy
 (see http://www.scipy.org/) for learning mixtures models
 using Expectation Maximization. For now, only Gaussian
@@ -10,16 +8,6 @@
  * computation of Gaussian pdf for multi-variate Gaussian
  random vectors (spherical, diagonal and full covariance matrices)
  * Sampling of Gaussian Mixtures Models
- * Confidence ellipsoides with probability (fixed level of 
- 0.39 for now)
+ * Confidence ellipsoides with probability at arbitrary level
  * Classic EM for Gaussian Mixture Models
  * K-mean based and random initialization for EM available
-
-Has been tested on the following platforms:
-
- * Ubuntu dapper, bi Xeon 3.2 Ghz, 2 Go RAM
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, pentium M 1.2 ghz,. 512 Mo Ram
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, minimac (ppc G4 1.42 Ghz, 1Gb RAM)
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,12 +1,13 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
 
 import numpy as N
 import numpy.linalg as lin
 from numpy.random import randn
 from scipy.stats import chi2
+import misc
 
 # Error classes
 class DenError(Exception):
@@ -164,19 +165,28 @@
  
     return y
 
-# To plot a confidence ellipse from multi-variate gaussian pdf
-def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39):
+# To get coordinatea of a confidence ellipse from multi-variate gaussian pdf
+def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \
+        npoints = misc._DEF_ELL_NP, \
+        level = misc._DEF_LEVEL):
     """ Given a mean and covariance for multi-variate
     gaussian, returns npoints points for the ellipse
     of confidence given by level (all points will be inside
     the ellipsoides with a probability equal to level)
     
     Returns the coordinate x and y of the ellipse"""
+    if level >= 1 or level <= 0:
+        raise ValueError("level should be a scale strictly between 0 and 1.""")
     
     mu      = N.atleast_1d(mu)
     va      = N.atleast_1d(va)
+    d       = mu.shape[0]
     c       = N.array(dim)
 
+    print c, d
+    if N.any(c < 0) or N.any(c >= d):
+        raise ValueError("dim elements should be >= 0 and < %d (dimension"\
+                " of the variance)" % d)
     if mu.size == va.size:
         mode    = 'diag'
     else:

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 03:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -7,7 +7,7 @@
 from numpy.random import randn, rand
 import numpy.linalg as lin
 import densities
-from misc import _MAX_DBL_DEV
+import misc
 
 # Right now, two main usages of a Gaussian Model are possible
 #   - init a Gaussian Model with meta-parameters, and trains it
@@ -147,7 +147,8 @@
 
         return X
 
-    def conf_ellipses(self, *args, **kargs):
+    def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \
+        level = misc._DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
@@ -179,14 +180,14 @@
         if self.mode == 'diag':
             for i in range(self.k):
                 xe, ye  = densities.gauss_ell(self.mu[i,:], self.va[i,:], 
-                        *args, **kargs)
+                        dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
         elif self.mode == 'full':
             for i in range(self.k):
                 xe, ye  = densities.gauss_ell(self.mu[i,:], 
                         self.va[i*self.d:i*self.d+self.d,:], 
-                        *args, **kargs)
+                        dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
 
@@ -253,7 +254,8 @@
     #=================
     # Plotting methods
     #=================
-    def plot(self, *args, **kargs):
+    def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, 
+            level = misc._DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
         Returns a list of lines, so that their style can be modified. By default,
@@ -266,7 +268,7 @@
 
         assert self.d > 1
         k       = self.k
-        Xe, Ye  = self.conf_ellipses(*args, **kargs)
+        Xe, Ye  = self.conf_ellipses(dim, npoints, level)
         try:
             import pylab as P
             return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
@@ -354,7 +356,8 @@
 
         return retval
 
-    def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+    def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50,
+            maxlevel = 0.95):
         """Do all the necessary computation for contour plot of mixture's density.
         
         Returns X, Y, Z and V as expected by mpl contour function."""
@@ -368,33 +371,49 @@
         # contribution to the pdf).
 
         # XXX: we need log pdf, not the pdf... this can save some computing
-        Xe, Ye = self.conf_ellipses(level = maxlevel)
+        Xe, Ye = self.conf_ellipses(level = maxlevel, dim = dim)
         ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
 
         w = ax[1] - ax[0]
         h = ax[3] - ax[2]
         X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
-                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
         V = [-5, -3, -1, -0.5, ]
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange):
+    def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
         gr = N.meshgrid(xrange, yrange)
         X = gr[0].flatten()
         Y = gr[1].flatten()
         xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
         # XXX refactor computing pdf
-        d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
-        d = N.sum(d, 1)
-        d = d.reshape(len(yrange), len(xrange))
+        dmu = self.mu[:, dim]
+        dva = self._get_va(dim)
+        den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
+        den = N.sum(den, 1)
+        den = den.reshape(len(yrange), len(xrange))
 
         X = gr[0]
         Y = gr[1]
-        return X, Y, d
+        return X, Y, den
 
+    def _get_va(self, dim):
+        """Returns variance limited do dimension in dim."""
+        dim = N.array(dim)
+        if dim.any() < 0 or dim.any() >= self.d:
+            raise ValueError("dim elements should be between 0 and dimension"\
+                    " of the mixture.")
+        if self.mode == 'diag':
+            return self.va[:, dim]
+        elif self.mode == 'full':
+            tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
+            tidx.flatten()
+            return self.va[tidx, dim]
+        else:
+            raise ValueError("Unkown mode")
     # Syntactic sugar
     def __repr__(self):
         repr    = ""
@@ -450,7 +469,7 @@
     """
         
     # Check that w is valid
-    if N.fabs(N.sum(w, 0)  - 1) > _MAX_DBL_DEV:
+    if N.fabs(N.sum(w, 0)  - 1) > misc._MAX_DBL_DEV:
         raise GmParamError('weight does not sum to 1')
     
     if not len(w.shape) == 1:

Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/misc.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,12 @@
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Sat Jun 09 12:00 PM 2007 J
 
+#========================================================
+# Constants used throughout the module (def args, etc...)
+#========================================================
+# This is the default dimension for representing confidence ellipses
+_DEF_VIS_DIM = [0, 1]
+_DEF_ELL_NP = 100
+_DEF_LEVEL = 0.39
 #=====================================================================
 # "magic number", that is number used to control regularization and co
 # Change them at your risk !

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -13,6 +13,7 @@
 
 set_package_path()
 from pyem.densities import gauss_den
+import pyem.densities
 restore_path()
 
 #Optional:
@@ -105,89 +106,15 @@
         self._generate_test_data_2d_full()
         self._check(level)
 
+class test_gauss_ell(NumpyTestCase):
+    def test_dim(self):
+        pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 1])
+        try:
+            pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 2])
+            raise AssertionError("this call should not succeed, bogus dim.")
+        except ValueError, e:
+            print "Call with bogus dim did not succeed, OK"
+
+
 if __name__ == "__main__":
     NumpyTest().run()
-
-# def generate_test_data(n, d, mode = 'diag', file='test.dat'):
-#     """Generate a set of data of dimension d, with n frames,
-#     that is input data, mean, var and output of gden, so that
-#     other implementations can be tested against"""
-#     mu  = randn(1, d)
-#     if mode == 'diag':
-#         va  = abs(randn(1, d))
-#     elif mode == 'full':
-#         va  = randn(d, d)
-#         va  = dot(va, va.transpose())
-# 
-#     input   = randn(n, d)
-#     output  = gauss_den(input, mu, va)
-# 
-#     import tables
-#     h5file  = tables.openFile(file, "w")
-# 
-#     h5file.createArray(h5file.root, 'input', input)
-#     h5file.createArray(h5file.root, 'mu', mu)
-#     h5file.createArray(h5file.root, 'va', va)
-#     h5file.createArray(h5file.root, 'output', output)
-# 
-#     h5file.close()
-# 
-# def test_gauss_den():
-#     """"""
-#     # import tables
-#     # import numpy as N
-#     # 
-#     # filename    = 'dendata.h5'
-# 
-#     # # # Dimension 1
-#     # # d   = 1
-#     # # mu  = 1.0
-#     # # va  = 2.0
-# 
-#     # # X   = randn(1e3, 1)
-# 
-#     # # Y   = gauss_den(X, mu, va)
-# 
-#     # # h5file      = tables.openFile(filename, "w")
-# 
-#     # # h5file.createArray(h5file.root, 'X', X)
-#     # # h5file.createArray(h5file.root, 'mu', mu)
-#     # # h5file.createArray(h5file.root, 'va', va)
-#     # # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # # h5file.close()
-# 
-#     # # # Dimension 2, diag
-#     # # d   = 2
-#     # # mu  = N.array([1.0, -2.0])
-#     # # va  = N.array([1.0, 2.0])
-# 
-#     # # X   = randn(1e3, 2)
-# 
-#     # # Y   = gauss_den(X, mu, va)
-# 
-#     # # h5file      = tables.openFile(filename, "w")
-# 
-#     # # h5file.createArray(h5file.root, 'X', X)
-#     # # h5file.createArray(h5file.root, 'mu', mu)
-#     # # h5file.createArray(h5file.root, 'va', va)
-#     # # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # # Dimension 2, full
-#     # d   = 2
-#     # mu  = N.array([[0.2, -1.0]])
-#     # va  = N.array([[1.2, 0.1], [0.1, 0.5]])
-# 
-#     # X   = randn(1e3, 2)
-# 
-#     # Y   = gauss_den(X, mu, va)
-# 
-#     # h5file      = tables.openFile(filename, "w")
-# 
-#     # h5file.createArray(h5file.root, 'X', X)
-#     # h5file.createArray(h5file.root, 'mu', mu)
-#     # h5file.createArray(h5file.root, 'va', va)
-#     # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # h5file.close()
-#