[Scipy-svn] r2357 - in trunk/Lib/sandbox/pyem: . src tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Dec 6 07:34:37 EST 2006
Author: cdavid
Date: 2006-12-06 06:27:51 -0600 (Wed, 06 Dec 2006)
New Revision: 2357
Added:
trunk/Lib/sandbox/pyem/densities2.py
trunk/Lib/sandbox/pyem/src/pure_den.c
Modified:
trunk/Lib/sandbox/pyem/
trunk/Lib/sandbox/pyem/online_em.py
trunk/Lib/sandbox/pyem/setup.py
trunk/Lib/sandbox/pyem/src/Makefile
trunk/Lib/sandbox/pyem/tests/test_online_em.py
Log:
Add densities2.py for preliminary axis support and add specialized class for online EM in 1d for a 10-100x speed incread
Property changes on: trunk/Lib/sandbox/pyem
___________________________________________________________________
Name: svn:ignore
- *.pyc
*.swp
*.pyd
*.so
+ *.pyc
*.swp
*.pyd
*.so
*.prof
Added: trunk/Lib/sandbox/pyem/densities2.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities2.py 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/densities2.py 2006-12-06 12:27:51 UTC (rev 2357)
@@ -0,0 +1,267 @@
+#! /usr/bin/python
+#
+# Copyrighted David Cournapeau
+# Last Change: Wed Dec 06 09:00 PM 2006 J
+
+# New version, with default numpy ordering.
+
+import numpy as N
+import numpy.linalg as lin
+from numpy.random import randn
+from scipy.stats import chi2
+
+# Error classes
+class DenError(Exception):
+ """Base class for exceptions in this module.
+
+ Attributes:
+ expression -- input expression in which the error occurred
+ message -- explanation of the error"""
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return self.message
+
+#============
+# Public API
+#============
+# The following function do all the fancy stuff to check that parameters
+# are Ok, and call the right implementation if args are OK.
+def gauss_den(x, mu, va, log = False, axis = -1):
+ """ Compute multivariate Gaussian density at points x for
+ mean mu and variance va along specified axis:
+
+ requirements:
+ * mean must be rank 0 (1d) or rank 1 (multi variate gaussian)
+ * va must be rank 0 (1d), rank 1(multi variate, diag covariance) or rank 2
+ (multivariate, full covariance).
+ * in 1 dimension case, any rank for mean and va is ok, as long as their size
+ is 1 (eg they contain only 1 element)
+
+ Caution: if x is rank 1, it is assumed you have a 1d problem. You cannot compute
+ the gaussian densities of only one sample of dimension d; for this, you have
+ to use a rank 2 !
+
+ If log is True, than the log density is returned
+ (useful for underflow ?)"""
+
+ # If data is rank 1, then we have 1 dimension problem.
+ if x.ndim == 1:
+ d = 1
+ n = x.size
+ if not N.size(mu) == 1:
+ raise DenError("for 1 dimension problem, mean must have only one element")
+
+ if not N.size(va) == 1:
+ raise DenError("for 1 dimension problem, mean must have only one element")
+
+ return _scalar_gauss_den(x, mu, va, log)
+ # If data is rank 2, then we may have 1 dimension or multi-variate problem
+ elif x.ndim == 2:
+ oaxis = (axis + 1) % 2
+ n = x.shape[axis]
+ d = x.shape[oaxis]
+
+ # Get away with 1d case now
+ if d == 1:
+ return _scalar_gauss_den(x, mu, va, log)
+
+ # Now, d > 1 (numpy attributes should be valid on mean and va now)
+ if not N.size(mu) == d or not mu.ndim == 1:
+ raise DenError("data is %d dimension, but mean's shape is %s" \
+ % (d, N.shape(mu)) + " (should be (%d,))" % d)
+
+ isfull = (va.ndim == 2)
+ if not (N.size(va) == d or (isfull and va.shape[0] == va.shape[1] == d)):
+ raise DenError("va has an invalid shape or number of elements")
+
+ if isfull:
+ # Compute along rows
+ if oaxis == 0:
+ return _full_gauss_den(x, mu[:, N.newaxis], va, log, axis)
+ else:
+ return _full_gauss_den(x, mu, va, log, axis)
+ else:
+ return _diag_gauss_den(x, mu, va, log, axis)
+ else:
+ raise RuntimeError("Sorry, only rank up to 2 supported")
+
+# To plot a confidence ellipse from multi-variate gaussian pdf
+def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39):
+ """ Given a mean and covariance for multi-variate
+ gaussian, returns npoints points for the ellipse
+ of confidence given by level (all points will be inside
+ the ellipsoides with a probability equal to level)
+
+ Returns the coordinate x and y of the ellipse"""
+
+ c = N.array(dim)
+
+ if mu.size < 2:
+ raise RuntimeError("this function only make sense for dimension 2 and more")
+
+ if mu.size == va.size:
+ mode = 'diag'
+ else:
+ if va.ndim == 2:
+ if va.shape[0] == va.shape[1]:
+ mode = 'full'
+ else:
+ raise DenError("variance not square")
+ else:
+ raise DenError("mean and variance are not dim conformant")
+
+ # If X ~ N(mu, va), then [X` * va^(-1/2) * X] ~ Chi2
+ chi22d = chi2(2)
+ mahal = N.sqrt(chi22d.ppf(level))
+
+ # Generates a circle of npoints
+ theta = N.linspace(0, 2 * N.pi, npoints)
+ circle = mahal * N.array([N.cos(theta), N.sin(theta)])
+
+ # Get the dimension which we are interested in:
+ mu = mu[dim]
+ if mode == 'diag':
+ va = va[dim]
+ elps = N.outer(mu, N.ones(npoints))
+ elps += N.dot(N.diag(N.sqrt(va)), circle)
+ elif mode == 'full':
+ va = va[c,:][:,c]
+ # Method: compute the cholesky decomp of each cov matrix, that is
+ # compute cova such as va = cova * cova'
+ # WARN: scipy is different than matlab here, as scipy computes a lower
+ # triangular cholesky decomp:
+ # - va = cova * cova' (scipy)
+ # - va = cova' * cova (matlab)
+ # So take care when comparing results with matlab !
+ cova = lin.cholesky(va)
+ elps = N.outer(mu, N.ones(npoints))
+ elps += N.dot(cova, circle)
+ else:
+ raise DenParam("var mode not recognized")
+
+ return elps[0, :], elps[1, :]
+
+#=============
+# Private Api
+#=============
+# Those 3 functions do almost all the actual computation
+def _scalar_gauss_den(x, mu, va, log):
+ """ This function is the actual implementation
+ of gaussian pdf in scalar case. It assumes all args
+ are conformant, so it should not be used directly
+
+ Call gauss_den instead"""
+ inva = 1/va
+ fac = (2*N.pi) ** (-1/2.0) * N.sqrt(inva)
+ y = ((x-mu) ** 2) * -0.5 * inva
+ if not log:
+ y = fac * N.exp(y.ravel())
+ else:
+ y = y + log(fac)
+
+ return y
+
+def _diag_gauss_den(x, mu, va, log, axis):
+ """ This function is the actual implementation
+ of gaussian pdf in scalar case. It assumes all args
+ are conformant, so it should not be used directly
+
+ Call gauss_den instead"""
+ # Diagonal matrix case
+ d = mu.size
+ if axis % 2 == 0:
+ x = N.swapaxes(x, 0, 1)
+
+ if not log:
+ inva = 1/va[0]
+ fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
+ y = (x[0] - mu[0]) ** 2 * inva * -0.5
+ for i in range(1, d):
+ inva = 1/va[i]
+ fac *= N.sqrt(inva)
+ y += (x[i] - mu[i]) ** 2 * inva * -0.5
+ y = fac * N.exp(y)
+ else:
+ y = _scalar_gauss_den(x[0], mu[0], va[0], log)
+ for i in range(1, d):
+ y += _scalar_gauss_den(x[i], mu[i], va[i], log)
+
+ return y
+
+def _full_gauss_den(x, mu, va, log, axis):
+ """ This function is the actual implementation
+ of gaussian pdf in full matrix case.
+
+ It assumes all args are conformant, so it should
+ not be used directly Call gauss_den instead
+
+ Does not check if va is definite positive (on inversible
+ for that matter), so the inverse computation and/or determinant
+ would throw an exception."""
+ d = mu.size
+ inva = lin.inv(va)
+ fac = 1 / N.sqrt( (2*N.pi) ** d * N.fabs(lin.det(va)))
+
+ # # Slow version (does not work since version 0.6)
+ # n = N.size(x, 0)
+ # y = N.zeros(n)
+ # for i in range(n):
+ # y[i] = N.dot(x[i,:],
+ # N.dot(inva, N.transpose(x[i,:])))
+ # y *= -0.5
+
+ # we are using a trick with sum to "emulate"
+ # the matrix multiplication inva * x without any explicit loop
+ if axis % 2 == 1:
+ y = N.dot(inva, (x-mu))
+ y = -0.5 * N.sum(y * (x-mu), 0)
+ else:
+ y = N.dot((x-mu), inva)
+ y = -0.5 * N.sum(y * (x-mu), 1)
+
+ if not log:
+ y = fac * N.exp(y)
+ else:
+ y = y + N.log(fac)
+
+ return y
+
+if __name__ == "__main__":
+ import pylab
+
+ #=========================================
+ # Test plotting a simple diag 2d variance:
+ #=========================================
+ va = N.array([5, 3])
+ mu = N.array([2, 3])
+
+ # Generate a multivariate gaussian of mean mu and covariance va
+ X = randn(2, 1e3)
+ Yc = N.dot(N.diag(N.sqrt(va)), X)
+ Yc = Yc.transpose() + mu
+
+ # Plotting
+ Xe, Ye = gauss_ell(mu, va, npoints = 100)
+ pylab.figure()
+ pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+ pylab.plot(Xe, Ye, 'r')
+
+ #=========================================
+ # Test plotting a simple full 2d variance:
+ #=========================================
+ va = N.array([[0.2, 0.1],[0.1, 0.5]])
+ mu = N.array([0, 3])
+
+ # Generate a multivariate gaussian of mean mu and covariance va
+ X = randn(1e3, 2)
+ Yc = N.dot(lin.cholesky(va), X.transpose())
+ Yc = Yc.transpose() + mu
+
+ # Plotting
+ Xe, Ye = gauss_ell(mu, va, npoints = 100, level=0.95)
+ pylab.figure()
+ pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+ pylab.plot(Xe, Ye, 'r')
+ pylab.show()
Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/online_em.py 2006-12-06 12:27:51 UTC (rev 2357)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Mon Oct 23 07:00 PM 2006 J
+# Last Change: Wed Dec 06 09:00 PM 2006 J
#---------------------------------------------
# This is not meant to be used yet !!!! I am
@@ -21,9 +21,10 @@
from numpy import mean
from numpy.testing import assert_array_almost_equal, assert_array_equal
-from gmm_em import ExpMixtureModel, GMM, EM, multiple_gauss_den
+from gmm_em import ExpMixtureModel, GMM, EM
from gauss_mix import GM
from kmean import kmean
+import densities2 as D
import copy
from numpy.random import seed
@@ -50,7 +51,7 @@
return self.message
class OnGMM(ExpMixtureModel):
- """A Class for 'online' (ie recursive) EM. Insteand
+ """A Class for 'online' (ie recursive) EM. Instead
of running the E step on the whole data, the sufficient statistics
are updated for each new frame of data, and used in the (unchanged)
M step"""
@@ -135,14 +136,14 @@
self.init = init_methods[init]
- def compute_sufficient_statistics(self, frame, nu):
- """ sufficient_statistics(frame, nu)
+ def compute_sufficient_statistics_frame(self, frame, nu):
+ """ sufficient_statistics(frame, nu) for one frame of data
- frame has to be rank 2 !"""
- gamma = multiple_gauss_den(frame, self.pmu, self.pva)[0]
+ frame has to be rank 1 !"""
+ gamma = multiple_gauss_den_frame(frame, self.pmu, self.pva)
gamma *= self.pw
gamma /= N.sum(gamma)
- # <1>(t) = cw(t), each column is one component cw = (cw1, ..., cwK);
+ # <1>(t) = cw(t), self.cw = cw(t), each element is one component running weight
#self.cw = (1 - nu) * self.cw + nu * gamma
self.cw *= (1 - nu)
self.cw += nu * gamma
@@ -151,16 +152,136 @@
self.cx[k] = (1 - nu) * self.cx[k] + nu * frame * gamma[k]
self.cxx[k] = (1 - nu) * self.cxx[k] + nu * frame ** 2 * gamma[k]
- def update_em(self):
+ def update_em_frame(self):
for k in range(self.gm.k):
self.cmu[k] = self.cx[k] / self.cw[k]
self.cva[k] = self.cxx[k] / self.cw[k] - self.cmu[k] ** 2
+import _rawden
+
+class OnGMM1d(ExpMixtureModel):
+ """Special purpose case optimized for 1d dimensional cases.
+
+ Require each frame to be a float, which means the API is a bit
+ different than OnGMM. You are trading elegance for speed here !"""
+ def init_kmean(self, init_data, niter = 5):
+ """ Init the model using kmean."""
+ assert init_data.ndim == 1
+ k = self.gm.k
+ w = N.ones(k) / k
+
+ # Init the internal state of EM
+ self.cx = w * mean(init_data)
+ self.cxx = w * mean(init_data ** 2)
+
+ # w, mu and va init is the same that in the standard case
+ (code, label) = kmean(init_data[:, N.newaxis], \
+ init_data[0:k, N.newaxis], niter)
+ mu = code.copy()
+ va = N.zeros((k, 1))
+ for i in range(k):
+ va[i] = N.cov(init_data[N.where(label==i)], rowvar = 0)
+
+ self.gm.set_param(w, mu, va)
+ # c* are the parameters which are computed at every step (ie
+ # when a new frame is taken into account
+ self.cw = self.gm.w
+ self.cmu = self.gm.mu[:, 0]
+ self.cva = self.gm.va[:, 0]
+
+ # p* are the parameters used when computing gaussian densities
+ # they are the same than c* in the online case
+ # self.pw = self.cw.copy()
+ # self.pmu = self.cmu.copy()
+ # self.pva = self.cva.copy()
+ self.pw = self.cw
+ self.pmu = self.cmu
+ self.pva = self.cva
+
+ def __init__(self, gm, init_data, init = 'kmean'):
+ self.gm = gm
+ if self.gm.d is not 1:
+ raise RuntimeError("expects 1d gm only !")
+
+ # Possible init methods
+ init_methods = {'kmean' : self.init_kmean}
+ self.init = init_methods[init]
+
+ def compute_sufficient_statistics_frame(self, frame, nu):
+ """expects frame and nu to be float. Returns
+ cw, cxx and cxx, eg the sufficient statistics."""
+ _rawden.compute_ss_frame_1d(frame, self.cw, self.cmu, self.cva,
+ self.cx, self.cxx, nu)
+ return self.cw, self.cx, self.cxx
+
+ def update_em_frame(self, cw, cx, cxx):
+ """Update EM state using SS as returned by
+ compute_sufficient_statistics_frame. """
+ self.cmu = cx / cw
+ self.cva = cxx / cw - self.cmu ** 2
+
+ def compute_em_frame(self, frame, nu):
+ """Run a whole em step for one frame. frame and nu should be float;
+ if you don't need to split E and M steps, this is faster than calling
+ compute_sufficient_statistics_frame and update_em_frame"""
+ _rawden.compute_em_frame_1d(frame, self.cw, self.cmu, self.cva, \
+ self.cx, self.cxx, nu)
+#class OnlineEM:
+# def __init__(self, ogm):
+# """Init Online Em algorithm with ogm, an instance of OnGMM."""
+# if not isinstance(ogm, OnGMM):
+# raise TypeError("expect a OnGMM instance for the model")
+#
+# def init_em(self):
+# pass
+#
+# def train(self, data, nu):
+# pass
+#
+# def train_frame(self, frame, nu):
+# pass
+
+def multiple_gauss_den_frame(data, mu, va):
+ """Helper function to generate several Gaussian
+ pdf (different parameters) from one frame of data.
+
+ Semantics depending on data's rank
+ - rank 0: mu and va expected to have rank 0 or 1
+ - rank 1: mu and va expected to have rank 2."""
+ if N.ndim(data) == 0:
+ # scalar case
+ k = mu.size
+ inva = 1/va
+ fac = (2*N.pi) ** (-1/2.0) * N.sqrt(inva)
+ y = ((data-mu) ** 2) * -0.5 * inva
+ return fac * N.exp(y.ravel())
+ elif N.ndim(data) == 1:
+ # multi variate case (general case)
+ k = mu.shape[0]
+ y = N.zeros(k, data.dtype)
+ if mu.size == va.size:
+ # diag case
+ for i in range(k):
+ #y[i] = D.gauss_den(data, mu[i], va[i])
+ # This is a bit hackish: _diag_gauss_den implementation's
+ # changes can break this, but I don't see how to easily fix this
+ y[i] = D._diag_gauss_den(data, mu[i], va[i], False, -1)
+ return y
+ else:
+ raise RuntimeError("full not implemented yet")
+ #for i in range(K):
+ # y[i] = D.gauss_den(data, mu[i, :],
+ # va[d*i:d*i+d, :])
+ #return y.T
+ else:
+ raise RuntimeError("frame should be rank 0 or 1 only")
+
+
if __name__ == '__main__':
d = 1
k = 2
mode = 'diag'
- nframes = int(1e3)
+ nframes = int(5e3)
emiter = 4
seed(5)
@@ -206,13 +327,29 @@
for i in range(1, len(lamb)):
nu[i] = 1./(1 + lamb[i] / nu[i-1])
+ print "meth1"
# object version of online EM
for t in range(nframes):
- ogmm.compute_sufficient_statistics(data[t:t+1, :], nu[t])
- ogmm.update_em()
+ ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
+ ogmm.update_em_frame()
ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+ # 1d optimized version
+ ogm2 = GM(d, k, mode)
+ ogmm2 = OnGMM1d(ogm2, 'kmean')
+ ogmm2.init(init_data[:, 0])
+
+ print "meth2"
+ # object version of online EM
+ for t in range(nframes):
+ ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
+ ogmm2.update_em_frame()
+
+ #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
+
+ print ogmm.cw
+ print ogmm2.cw
#+++++++++++++++
# Draw the model
#+++++++++++++++
Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/setup.py 2006-12-06 12:27:51 UTC (rev 2357)
@@ -1,12 +1,19 @@
#! /usr/bin/env python
-# Last Change: Thu Nov 09 06:00 PM 2006 J
+# Last Change: Wed Dec 06 08:00 PM 2006 J
# TODO:
# - check how to handle cmd line build options with distutils and use
# it in the building process
""" pyem is a small python package to estimate Gaussian Mixtures Models
-from data, using Expectation Maximization"""
+from data, using Expectation Maximization.
+Maximum likelihood EM for mixture of Gaussian is implemented, with BIC computation
+for number of cluster assessment.
+
+There is also an experimental online EM version (the EM is updated for each new
+sample), and I plan to add Variational Bayes and/or MCMC support for Bayesian approach
+for estimating meta parameters of mixtures. """
+
from os.path import join
# This import from __init__ looks strange, should check whether there is no other way
from info import version as pyem_version
@@ -28,6 +35,10 @@
#define_macros=[('LIBSVM_EXPORTS', None),
# ('LIBSVM_DLL', None)],
sources=[join('src', 'c_gden.c')])
+ config.add_extension('_rawden',
+ #define_macros=[('LIBSVM_EXPORTS', None),
+ # ('LIBSVM_DLL', None)],
+ sources=[join('src', 'pure_den.c')])
return config
Modified: trunk/Lib/sandbox/pyem/src/Makefile
===================================================================
--- trunk/Lib/sandbox/pyem/src/Makefile 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/src/Makefile 2006-12-06 12:27:51 UTC (rev 2357)
@@ -4,26 +4,36 @@
PYREX = python2.4-pyrexc
PYTHONINC = -I/usr/include/python2.4
-NUMPYINC = -I/usr/lib/python2.4/site-packages/numpy/core/include
+NUMPYINC = -I/home/david/local/lib/python2.4/site-packages/numpy/core/include
OPTIMS = -O3 -funroll-all-loops -march=pentium4 -msse2
-WARN = -W -Wall
+WARN = -Wall -W -Winline -Wstrict-prototypes -Wmissing-prototypes \
+ -Waggregate-return -Wcast-align -Wcast-qual -Wnested-externs \
+ -Wshadow -Wbad-function-cast -Wwrite-strings
CFLAGS = $(PYTHONINC) $(NUMPYINC) $(OPTIMS) $(WARN)
+targets: c_gden.so _rawden.so
+
c_gden.so: c_gden.o
$(LD) -shared -o $@ $< -Wl,-soname,$@
+_rawden.so: pure_den.o
+ $(LD) -shared -o $@ $< -Wl,-soname,$@
+
c_gden.o: c_gden.c
$(CC) -c $(CFLAGS) -fPIC $<
-c_gmm.so: c_gmm.o
- $(LD) -shared -o $@ $< -Wl,-soname,$@
-
-c_gmm.o: c_gmm.c
+pure_den.o: pure_den.c
$(CC) -c $(CFLAGS) -fPIC $<
-c_gmm.c: c_gmm.pyx c_numpy.pxd c_python.pxd
- $(PYREX) $<
+#c_gmm.so: c_gmm.o
+# $(LD) -shared -o $@ $< -Wl,-soname,$@
+#
+#c_gmm.o: c_gmm.c
+# $(CC) -c $(CFLAGS) -fPIC $<
+#
+#c_gmm.c: c_gmm.pyx c_numpy.pxd c_python.pxd
+# $(PYREX) $<
clean:
rm -f c_gmm.c
Added: trunk/Lib/sandbox/pyem/src/pure_den.c
===================================================================
--- trunk/Lib/sandbox/pyem/src/pure_den.c 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/src/pure_den.c 2006-12-06 12:27:51 UTC (rev 2357)
@@ -0,0 +1,458 @@
+/*
+ * Last Change: Wed Dec 06 08:00 PM 2006 J
+ */
+#include <Python.h>
+#include <numpy/arrayobject.h>
+
+#include <math.h>
+
+PyObject* compute_ss_frame_1d_py(PyObject* dum, PyObject *arg);
+PyObject* compute_em_frame_1d_py(PyObject* dum, PyObject *arg);
+
+/*
+ * Pure C methods
+ */
+static int compute_ss_frame_1d(double sample, int nc, double* w, double* mu, double *va,
+ double *cx, double *cxx, double nu);
+static int update_em_frame_1d(const double* w, const double* cx, const double *cxx,
+ int nc, double *cmu, double *cva);
+
+static PyMethodDef mymethods[] = {
+ {"compute_ss_frame_1d", compute_ss_frame_1d_py, METH_VARARGS, ""},
+ {"compute_em_frame_1d", compute_em_frame_1d_py, METH_VARARGS, ""},
+ {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+/*
+ * function table
+ */
+PyMODINIT_FUNC init_rawden(void)
+{
+ (void)Py_InitModule("_rawden", mymethods);
+ import_array();
+}
+
+PyObject* compute_ss_frame_1d_py(PyObject* self, PyObject *args)
+{
+ PyObject *w, *mu, *va, *cx, *cxx;
+ PyObject *w_a, *mu_a, *va_a, *cx_a, *cxx_a;
+ double nu, sample;
+ npy_intp ndim, *dims, len;
+ double *mu_ca, *va_ca, *w_ca, *cx_ca, *cxx_ca;
+
+ const npy_intp mrank = 1;
+
+ int st;
+
+ /*
+ * Init python object holder to NULL so that we can use Py_XDECREF on all
+ * the objets when something is woring
+ */
+ w = NULL;
+ mu = NULL;
+ va = NULL;
+ cx = NULL;
+ cxx = NULL;
+
+ w_a = NULL;
+ mu_a = NULL;
+ va_a = NULL;
+ cx_a = NULL;
+ cxx_a = NULL;
+ /*
+ * Parsing of args: w, cx and cxx are inout
+ */
+ if (!PyArg_ParseTuple(args, "dOOOOOd", &sample, &w, &mu, &va, &cx, &cxx, &nu)){
+ return NULL;
+ }
+ if ( (nu > 1) | (nu <= 0) ) {
+ PyErr_SetString(PyExc_TypeError, "nu should be between 0 and 1");
+ return NULL;
+ }
+
+ /* inout entries */
+ w_a = PyArray_FROM_OTF(w, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (w_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "w array not convertible");
+ return NULL;
+ }
+
+ cx_a = PyArray_FROM_OTF(cx, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (cx_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "cx array not convertible");
+ goto fail;
+ }
+
+ cxx_a = PyArray_FROM_OTF(cxx, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (cxx_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "cxx array not convertible");
+ goto fail;
+ }
+
+ /* in only entries */
+ mu_a = PyArray_FROM_OTF(mu, NPY_DOUBLE, NPY_IN_ARRAY);
+ if (mu_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "mu array not convertible");
+ goto fail;
+ }
+
+ va_a = PyArray_FROM_OTF(va, NPY_DOUBLE, NPY_IN_ARRAY);
+ if (va_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "va array not convertible");
+ goto fail;
+ }
+
+ /*
+ * Check that in and out have same size and same rank
+ */
+ ndim = PyArray_NDIM(w_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "w rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(cx_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "cx rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(cxx_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "cxx rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(mu_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "mu rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(va_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "va rank should be 1");
+ goto fail;
+ }
+
+ dims = PyArray_DIMS(w_a);
+ len = dims[0];
+ //fprintf(stderr, "%s:%s, len is %d\n", __FILE__, __func__, len);
+ dims = PyArray_DIMS(cx_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "cx shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(cxx_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "cxx shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(mu_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "mu_a shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(va_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "va_a shape should match !");
+ goto fail;
+ }
+
+ /*
+ * Get pointer to the data
+ */
+ w_ca = PyArray_DATA(w_a);
+ if (w_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for w_ca");
+ goto fail;
+ }
+ cx_ca = PyArray_DATA(cx_a);
+ if (cx_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for cx_ca");
+ goto fail;
+ }
+ cxx_ca = PyArray_DATA(cxx_a);
+ if (w_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for cxx_ca");
+ goto fail;
+ }
+ mu_ca = PyArray_DATA(mu_a);
+ if (mu_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for mu_ca");
+ goto fail;
+ }
+ va_ca = PyArray_DATA(va_a);
+ if (va_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for va_ca");
+ goto fail;
+ }
+ /*
+ * Call actual implementation
+ */
+ st = compute_ss_frame_1d(sample, len, w_ca, mu_ca, va_ca, cx_ca, cxx_ca, nu);
+ if (st) {
+ PyErr_SetString(PyExc_TypeError, "Error while calling multi_gauss....");
+ goto fail;
+ }
+
+ Py_DECREF(w_a);
+ Py_DECREF(cx_a);
+ Py_DECREF(cxx_a);
+ Py_DECREF(mu_a);
+ Py_DECREF(va_a);
+
+ Py_INCREF(Py_None);
+ return Py_None;
+
+fail:
+ Py_XDECREF(w_a);
+ Py_XDECREF(cx_a);
+ Py_XDECREF(cxx_a);
+ Py_XDECREF(mu_a);
+ Py_XDECREF(va_a);
+ return NULL;
+}
+
+PyObject* compute_em_frame_1d_py(PyObject* self, PyObject *args)
+{
+ PyObject *w, *mu, *va, *cx, *cxx;
+ PyObject *w_a, *mu_a, *va_a, *cx_a, *cxx_a;
+ double nu, sample;
+ npy_intp ndim, *dims, len;
+ double *mu_ca, *va_ca, *w_ca, *cx_ca, *cxx_ca;
+
+ const npy_intp mrank = 1;
+
+ int st;
+
+ /*
+ * Init python object holder to NULL so that we can use Py_XDECREF on all
+ * the objets when something is woring
+ */
+ w = NULL;
+ mu = NULL;
+ va = NULL;
+ cx = NULL;
+ cxx = NULL;
+
+ w_a = NULL;
+ mu_a = NULL;
+ va_a = NULL;
+ cx_a = NULL;
+ cxx_a = NULL;
+ /*
+ * Parsing of args: w, cx and cxx are inout
+ */
+ if (!PyArg_ParseTuple(args, "dOOOOOd", &sample, &w, &mu, &va, &cx, &cxx, &nu)){
+ return NULL;
+ }
+ if ( (nu > 1) | (nu <= 0) ) {
+ PyErr_SetString(PyExc_TypeError, "nu should be between 0 and 1");
+ return NULL;
+ }
+
+ /* inout entries */
+ w_a = PyArray_FROM_OTF(w, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (w_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "w array not convertible");
+ return NULL;
+ }
+
+ cx_a = PyArray_FROM_OTF(cx, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (cx_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "cx array not convertible");
+ goto fail;
+ }
+
+ cxx_a = PyArray_FROM_OTF(cxx, NPY_DOUBLE, NPY_INOUT_ARRAY);
+ if (cxx_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "cxx array not convertible");
+ goto fail;
+ }
+
+ /* in only entries */
+ mu_a = PyArray_FROM_OTF(mu, NPY_DOUBLE, NPY_IN_ARRAY);
+ if (mu_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "mu array not convertible");
+ goto fail;
+ }
+
+ va_a = PyArray_FROM_OTF(va, NPY_DOUBLE, NPY_IN_ARRAY);
+ if (va_a == NULL) {
+ PyErr_SetString(PyExc_TypeError, "va array not convertible");
+ goto fail;
+ }
+
+ /*
+ * Check that in and out have same size and same rank
+ */
+ ndim = PyArray_NDIM(w_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "w rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(cx_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "cx rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(cxx_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "cxx rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(mu_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "mu rank should be 1");
+ goto fail;
+ }
+ ndim = PyArray_NDIM(va_a);
+ if(ndim != mrank) {
+ PyErr_SetString(PyExc_TypeError, "va rank should be 1");
+ goto fail;
+ }
+
+ dims = PyArray_DIMS(w_a);
+ len = dims[0];
+ //fprintf(stderr, "%s:%s, len is %d\n", __FILE__, __func__, len);
+ dims = PyArray_DIMS(cx_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "cx shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(cxx_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "cxx shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(mu_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "mu_a shape should match !");
+ goto fail;
+ }
+ dims = PyArray_DIMS(va_a);
+ if(dims[0] != len) {
+ PyErr_SetString(PyExc_TypeError, "va_a shape should match !");
+ goto fail;
+ }
+
+ /*
+ * Get pointer to the data
+ */
+ w_ca = PyArray_DATA(w_a);
+ if (w_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for w_ca");
+ goto fail;
+ }
+ cx_ca = PyArray_DATA(cx_a);
+ if (cx_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for cx_ca");
+ goto fail;
+ }
+ cxx_ca = PyArray_DATA(cxx_a);
+ if (w_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for cxx_ca");
+ goto fail;
+ }
+ mu_ca = PyArray_DATA(mu_a);
+ if (mu_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for mu_ca");
+ goto fail;
+ }
+ va_ca = PyArray_DATA(va_a);
+ if (va_ca == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Unknown Error for va_ca");
+ goto fail;
+ }
+ /*
+ * Call actual implementation
+ */
+ st = compute_ss_frame_1d(sample, len, w_ca, mu_ca, va_ca, cx_ca, cxx_ca, nu);
+ if (st) {
+ PyErr_SetString(PyExc_TypeError, "Error while calling multi_gauss....");
+ goto fail;
+ }
+ st = update_em_frame_1d(w_ca, cx_ca, cxx_ca, len, mu_ca, va_ca);
+ if (st) {
+ PyErr_SetString(PyExc_TypeError, "Error while calling update_em_frame_1d....");
+ goto fail;
+ }
+
+ Py_DECREF(w_a);
+ Py_DECREF(cx_a);
+ Py_DECREF(cxx_a);
+ Py_DECREF(mu_a);
+ Py_DECREF(va_a);
+
+ Py_INCREF(Py_None);
+ return Py_None;
+
+fail:
+ Py_XDECREF(w_a);
+ Py_XDECREF(cx_a);
+ Py_XDECREF(cxx_a);
+ Py_XDECREF(mu_a);
+ Py_XDECREF(va_a);
+ return NULL;
+}
+
+int compute_ss_frame_1d(double sample, int nc, double* w, double* mu, double *va,
+ double *cx, double *cxx, double nu)
+{
+ /*
+ * TODO: check va division
+ */
+ int i;
+ double inva, fac, *gam, acc;
+
+ gam = malloc(sizeof(*gam) * nc);
+ if (gam == NULL) {
+ return -1;
+ }
+
+ /* Compute gamma */
+ acc = 0;
+ for (i = 0; i < nc; ++i) {
+ inva = 1/va[i];
+ fac = 1 / sqrt(2 * M_PI * va[i]);
+ gam[i] = fac * exp( -0.5 * inva * (sample - mu[i]) * (sample - mu[i]));
+ gam[i] *= w[i];
+ acc += gam[i];
+ }
+ /* Normalize gamma */
+ for (i = 0; i < nc; ++i) {
+ gam[i] /= acc;
+ }
+
+ /* Compute new SS from EM (cx and cxx) */
+ for (i = 0; i < nc; ++i) {
+ w[i] *= (1 - nu);
+ w[i] += nu * gam[i];
+ cx[i] = (1 - nu) * cx[i] + nu * sample * gam[i];
+ cxx[i] = (1 - nu) * cxx[i] + nu * sample * sample * gam[i];
+ }
+
+ free(gam);
+
+ return 0;
+}
+
+/*
+ * update mu and va from SS w, cx and cxx. Only mu and va are modified
+ * all arrays have same length nc
+ */
+int update_em_frame_1d(const double* cw, const double* cx, const double *cxx,
+ int nc, double *cmu, double *cva)
+{
+ /*
+ * TODO: check va division
+ */
+ int i;
+ double invw;
+
+ /* Compute new SS from EM (cx and cxx) */
+ for (i = 0; i < nc; ++i) {
+ invw = 1/cw[i];
+ cmu[i] = cx[i] * invw;
+ cva[i] = cxx[i] * invw - cmu[i] * cmu[i];
+ }
+
+ return 0;
+}
Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-12-06 09:22:34 UTC (rev 2356)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-12-06 12:27:51 UTC (rev 2357)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Thu Nov 16 09:00 PM 2006 J
+# Last Change: Wed Dec 06 09:00 PM 2006 J
import copy
@@ -11,7 +11,7 @@
set_package_path()
from pyem import GM, GMM
-from pyem.online_em import OnGMM
+from pyem.online_em import OnGMM, OnGMM1d
restore_path()
# #Optional:
@@ -21,6 +21,7 @@
# Error precision allowed (nb of decimals)
AR_AS_PREC = 12
+KM_ITER = 5
class OnlineEmTest(NumpyTestCase):
def _create_model(self, d, k, mode, nframes, emiter):
@@ -38,7 +39,7 @@
# Init the model
lgm = GM(d, k, mode)
gmm = GMM(lgm, 'kmean')
- gmm.init(data)
+ gmm.init(data, niter = KM_ITER)
self.gm0 = copy.copy(gmm.gm)
# The actual EM, with likelihood computation
@@ -91,7 +92,7 @@
ogm = GM(d, k, mode)
ogmm = OnGMM(ogm, 'kmean')
init_data = self.data
- ogmm.init(init_data)
+ ogmm.init(init_data, niter = KM_ITER)
# Check that online kmean init is the same than kmean offline init
ogm0 = copy.copy(ogm)
@@ -116,8 +117,8 @@
ogmm.pva = ogmm.cva.copy()
for e in range(emiter):
for t in range(nframes):
- ogmm.compute_sufficient_statistics(self.data[t:t+1, :], nu[t])
- ogmm.update_em()
+ ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t])
+ ogmm.update_em_frame()
# Change pw args only a each epoch
ogmm.pw = ogmm.cw.copy()
@@ -147,12 +148,56 @@
d = 1
k = 2
mode = 'diag'
- nframes = int(1e3)
+ nframes = int(5e2)
emiter = 4
self._create_model(d, k, mode, nframes, emiter)
self._run_pure_online(d, k, mode, nframes)
+ def check_1d_imp(self):
+ d = 1
+ k = 2
+ mode = 'diag'
+ nframes = int(5e2)
+ emiter = 4
+
+ self._create_model(d, k, mode, nframes, emiter)
+ gmref = self._run_pure_online(d, k, mode, nframes)
+ gmtest = self._run_pure_online_1d(d, k, mode, nframes)
+
+ assert_array_almost_equal(gmref.w, gmtest.w, AR_AS_PREC)
+ assert_array_almost_equal(gmref.mu, gmtest.mu, AR_AS_PREC)
+ assert_array_almost_equal(gmref.va, gmtest.va, AR_AS_PREC)
+
+ def _run_pure_online_1d(self, d, k, mode, nframes):
+ #++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with online EM
+ #++++++++++++++++++++++++++++++++++++++++
+ ogm = GM(d, k, mode)
+ ogmm = OnGMM1d(ogm, 'kmean')
+ init_data = self.data[0:nframes / 20, :]
+ ogmm.init(init_data[:, 0])
+
+ # Forgetting param
+ ku = 0.005
+ t0 = 200
+ lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+ nu0 = 0.2
+ nu = N.zeros((len(lamb), 1))
+ nu[0] = nu0
+ for i in range(1, len(lamb)):
+ nu[i] = 1./(1 + lamb[i] / nu[i-1])
+
+ # object version of online EM
+ for t in range(nframes):
+ # the assert are here to check we do not create copies
+ # unvoluntary for parameters
+ a, b, c = ogmm.compute_sufficient_statistics_frame(self.data[t, 0], nu[t])
+ ogmm.update_em_frame(a, b, c)
+
+ ogmm.gm.set_param(ogmm.cw, ogmm.cmu[:, N.newaxis], ogmm.cva[:, N.newaxis])
+
+ return ogmm.gm
def _run_pure_online(self, d, k, mode, nframes):
#++++++++++++++++++++++++++++++++++++++++
# Approximate the models with online EM
@@ -179,10 +224,11 @@
assert ogmm.pw is ogmm.cw
assert ogmm.pmu is ogmm.cmu
assert ogmm.pva is ogmm.cva
- ogmm.compute_sufficient_statistics(self.data[t:t+1, :], nu[t])
- ogmm.update_em()
+ ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t])
+ ogmm.update_em_frame()
ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+ return ogmm.gm
if __name__ == "__main__":
NumpyTest().run()
More information about the Scipy-svn
mailing list