[Scipy-svn] r3099 - in trunk/Lib/sandbox/pyem: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Jun 12 08:21:14 EDT 2007
Author: cdavid
Date: 2007-06-12 07:21:04 -0500 (Tue, 12 Jun 2007)
New Revision: 3099
Modified:
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_densities.py
trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Add function to compute log responsabilities with logsumexp.
Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/densities.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,7 +1,7 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Tue Jun 12 12:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
"""This module implements various basic functions related to multivariate
gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
@@ -167,14 +167,6 @@
inva = lin.inv(va)
fac = 1 / N.sqrt( (2*N.pi) ** d * N.fabs(lin.det(va)))
- # # Slow version
- # n = N.size(x, 0)
- # y = N.zeros(n)
- # for i in range(n):
- # y[i] = N.dot(x[i,:],
- # N.dot(inva, N.transpose(x[i,:])))
- # y *= -0.5
-
# we are using a trick with sum to "emulate"
# the matrix multiplication inva * x without any explicit loop
y = N.dot((x-mu), inva)
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Mon Jun 11 06:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
"""Module implementing GM, a class which represents Gaussian mixtures.
@@ -12,7 +12,7 @@
import numpy as N
from numpy.random import randn, rand
import numpy.linalg as lin
-import densities
+import densities as D
import misc
# Right now, two main usages of a Gaussian Model are possible
@@ -276,13 +276,13 @@
Ye = []
if self.mode == 'diag':
for i in range(self.k):
- xe, ye = densities.gauss_ell(self.mu[i, :], self.va[i, :],
+ xe, ye = D.gauss_ell(self.mu[i, :], self.va[i, :],
dim, npoints, level)
Xe.append(xe)
Ye.append(ye)
elif self.mode == 'full':
for i in range(self.k):
- xe, ye = densities.gauss_ell(self.mu[i, :],
+ xe, ye = D.gauss_ell(self.mu[i, :],
self.va[i*self.d:i*self.d+self.d, :],
dim, npoints, level)
Xe.append(xe)
@@ -317,6 +317,7 @@
raise GmParamError("Unknown mode")
return True
+
@classmethod
def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
"""Generate random, valid parameters for a gaussian mixture model.
@@ -366,6 +367,27 @@
# def _regularize(self):
# raise NotImplemented("No regularization")
+ def pdf(self, x, log = False):
+ """Computes the pdf of the model at given points.
+
+ :Parameters:
+ x : ndarray
+ points where to estimate the pdf. One row for one
+ multi-dimensional sample (eg to estimate the pdf at 100
+ different points in 10 dimension, data's shape should be (100,
+ 20)).
+ log : bool
+ If true, returns the log pdf instead of the pdf.
+
+ :Returns:
+ y : ndarray
+ the pdf at points x."""
+ if log:
+ return D.logsumexp(N.sum(
+ D.multiple_gauss_den(x, self.mu, self.va, log = True) * self.w, 1))
+ else:
+ return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
+
#=================
# Plotting methods
#=================
@@ -572,7 +594,7 @@
# XXX refactor computing pdf
dmu = self.mu[:, dim]
dva = self._get_va(dim)
- den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
+ den = D.multiple_gauss_den(xdata, dmu, dva) * self.w
den = N.sum(den, 1)
den = den.reshape(len(rangey), len(rangex))
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Tue Jun 12 11:00 AM 2007 J
+# Last Change: Tue Jun 12 08:00 PM 2007 J
"""Module implementing GMM, a class to estimate Gaussian mixture models using
EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -159,7 +159,7 @@
knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
i) = P[state = i | observation = data(t); w, mu, va]
- This is basically the E step of EM for GMM."""
+ This is basically the E step of EM for finite mixtures."""
# compute the gaussian pdf
tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
# multiply by the weight
@@ -169,6 +169,28 @@
return gd, tgd
+ def compute_log_responsabilities(self, data):
+ """Compute log responsabilities.
+
+ Return normalized and non-normalized responsabilities for the model (in
+ the log domain)
+
+ Note
+ ----
+ Computes the latent variable distribution (a posteriori probability)
+ knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
+ i) = P[state = i | observation = data(t); w, mu, va]
+
+ This is basically the E step of EM for finite mixtures."""
+ # compute the gaussian pdf
+ tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True)
+ # multiply by the weight
+ tgd += N.log(self.gm.w)
+ # Normalize to get a pdf
+ gd = tgd - densities.logsumexp(tgd)[:, N.newaxis]
+
+ return gd, tgd
+
def update_em(self, data, gamma):
"""Computes update of the Gaussian Mixture Model (M step)
from the a posteriori pdf, computed by gmm_posterior
Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Tue Jun 12 12:00 PM 2007 J
+# Last Change: Tue Jun 12 08:00 PM 2007 J
# TODO:
# - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -21,7 +21,7 @@
# import modules that are located in the same directory as this file.
restore_path()
-DEF_DEC = 12
+from testcommon import DEF_DEC
class TestDensities(NumpyTestCase):
def _generate_test_data_1d(self):
Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Mon Jun 11 03:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
# For now, just test that all mode/dim execute correctly
@@ -10,6 +10,7 @@
set_package_path()
from pyem import GM
+from pyem.densities import multiple_gauss_den
restore_path()
class test_BasicFunc(NumpyTestCase):
@@ -58,6 +59,16 @@
sva = gm._get_va(dim)
assert N.all(sva == tva)
+ def test_2d_diag_pdf(self):
+ d = 2
+ w = N.array([0.4, 0.6])
+ mu = N.array([[0., 2], [-1, -2]])
+ va = N.array([[1, 0.5], [0.5, 1]])
+ x = N.random.randn(100, 2)
+ gm = GM.fromvalues(w, mu, va)
+ y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1)
+ y2 = gm.pdf(x)
+ assert_array_almost_equal(y1, y2)
if __name__ == "__main__":
NumpyTest().run()
Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Tue Jun 12 11:00 AM 2007 J
+# Last Change: Tue Jun 12 09:00 PM 2007 J
# For now, just test that all mode/dim execute correctly
@@ -12,6 +12,8 @@
from pyem import GMM, GM, EM
restore_path()
+from testcommon import DEF_DEC
+
def load_dataset(filename):
from scipy.io import loadmat
dic = loadmat(filename, squeeze_me = False)
@@ -162,5 +164,46 @@
assert_array_equal(gmm.gm.mu, dic['mu'])
assert_array_equal(gmm.gm.va, dic['va'])
+class test_log_domain(EmTest):
+ """This class tests whether the GMM works in log domain."""
+ def _test_common(self, d, k, mode):
+ dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k))
+
+ gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+ gmm = GMM(gm, 'test')
+
+ a, na = gmm.compute_responsabilities(dic['data'])
+ la, nla = gmm.compute_log_responsabilities(dic['data'])
+
+ ta = N.log(a)
+ tna = N.log(na)
+ if not N.all(N.isfinite(ta)):
+ print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+ else:
+ assert_array_almost_equal(ta, la, DEF_DEC)
+
+ if not N.all(N.isfinite(tna)):
+ print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+ else:
+ assert_array_almost_equal(tna, nla, DEF_DEC)
+
+ def test_2d_diag(self, level = 1):
+ d = 2
+ k = 3
+ mode = 'diag'
+ self._test_common(d, k, mode)
+
+ def test_1d_full(self, level = 1):
+ d = 1
+ k = 4
+ mode = 'diag'
+ self._test_common(d, k, mode)
+
+ def test_2d_full(self, level = 1):
+ d = 2
+ k = 3
+ mode = 'full'
+ self._test_common(d, k, mode)
+
if __name__ == "__main__":
NumpyTest().run()
More information about the Scipy-svn
mailing list