[Scipy-svn] r2291 - in trunk/Lib/sandbox/pyem: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Fri Oct 20 00:01:19 EDT 2006
Author: cdavid
Date: 2006-10-19 23:01:05 -0500 (Thu, 19 Oct 2006)
New Revision: 2291
Added:
trunk/Lib/sandbox/pyem/tests/generate_test_data.py
trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_online_em.py
Modified:
trunk/Lib/sandbox/pyem/Changelog
trunk/Lib/sandbox/pyem/TODO
trunk/Lib/sandbox/pyem/__init__.py
trunk/Lib/sandbox/pyem/_c_densities.py
trunk/Lib/sandbox/pyem/example.py
trunk/Lib/sandbox/pyem/info.py
trunk/Lib/sandbox/pyem/online_em.py
trunk/Lib/sandbox/pyem/setup.py
trunk/Lib/sandbox/pyem/tests/test_densities.py
Log:
* Convert the online_em.py script to a class, OnGMM
* Add tests for OnGMM
* Small bugfixes in _c_densitites (convert general exception
to ImportException when wrong ctypes version found), setup
(remove the name attribute to avoid warning when installing)
Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/Changelog 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,3 +1,12 @@
+pyem (0.5.4) Fri, 20 Oct 2006 12:52:01 +0900
+
+ * Bump to 0.5.4
+ * Online EM is basically implemented, with tests. The API
+ should be fixed to be choherent (lacks the Trainer Class, too).
+ The class is not imported by default, still (available as _OnGMM)
+
+-- David Cournapeau <david at ar.media.kyoto-u.ac.jp>
+
pyem (0.5.3) Thu, 12 Oct 2006 21:08:21 +0900
* Change the layout and setup.py for inclusion to scipy.
Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/TODO 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,19 +1,12 @@
-# Last Change: Thu Oct 05 03:00 PM 2006 J
+# Last Change: Fri Oct 20 12:00 PM 2006 J
Things which must be implemented for a 1.0 version (in importante order)
- - test for various length and model size
+ - A class for learning + a classifier
+ - test for various length and model size of gaussian densities/GM and GMM
- a small help/tutorial
- be complient with scipy module dev guidelines (DEVELOPERS.TXT)
- - On-line EM
- - use scipy.cluster.vq instead of custom kmeans algo (find
- a way to get label from vq.kmeans: depends on scipy changes)
- - C implementation of Gaussian densities at least (partially done,
- but need integration into distutils + portable way of detecting/
- loading shared library with ctypes)
- - numpy computations make huge difference between C and F storage:
- should review the code to see if the
-Things which would be nice:
+Things which would be nice (after 1.0 version):
- Integrate libem (libem should be modified so
that it would be easy to package and distribute)
- Other initialization schemes
Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/__init__.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,10 +1,14 @@
#! /usr/bin/env python
-# Last Change: Thu Oct 12 11:00 PM 2006 J
+# Last Change: Fri Oct 20 11:00 AM 2006 J
from info import __doc__
+from gauss_mix import GmParamError, GM
+from gmm_em import GmmParamError, GMM, EM
+from online_em import OnGMM as _OnGMM
+
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+
from numpy.testing import NumpyTest
test = NumpyTest().test
-from gauss_mix import GmParamError, GM
-from gmm_em import GmmParamError, GMM, EM
Modified: trunk/Lib/sandbox/pyem/_c_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/_c_densities.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/_c_densities.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,7 +1,7 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Tue Oct 03 05:00 PM 2006 J
+# Last Change: Thu Oct 19 06:00 PM 2006 J
# This module uses a C implementation through ctypes, for diagonal cases
# TODO:
@@ -22,7 +22,7 @@
if ctypes_major < 1:
msg = "version of ctypes is %s, expected at least %s" \
% (ctypes.__version__, '1.0.0')
- raise Exception(msg)
+ raise ImportError(msg)
# Requirements for diag gden
_gden = load_library('c_gden.so', __file__)
Modified: trunk/Lib/sandbox/pyem/example.py
===================================================================
--- trunk/Lib/sandbox/pyem/example.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/example.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -49,7 +49,7 @@
# Keep a copy for drawing later
gm0 = copy.copy(lgm)
-# The actual EM, with likelihood computation. The treshold
+# The actual EM, with likelihood computation. The threshold
# is compared to the (linearly appromixated) derivative of the likelihood
em = EM()
like = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
@@ -60,6 +60,8 @@
import pylab as P
P.subplot(2, 1, 1)
+# Level is the confidence level for confidence ellipsoids: 1.0 means that
+# all points will be (almost surely) inside the ellipsoid
level = 0.8
if not d == 1:
P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/info.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -30,7 +30,7 @@
Copyright: David Cournapeau 2006
License: BSD-style (see LICENSE.txt in main source directory)
"""
-version = '0.5.3'
+version = '0.5.4'
depends = ['linalg', 'stats']
ignore = False
Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/online_em.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Mon Oct 16 03:00 PM 2006 J
+# Last Change: Fri Oct 20 12:00 PM 2006 J
#---------------------------------------------
# This is not meant to be used yet !!!! I am
@@ -10,6 +10,13 @@
# - we do not have all the data before putting them in online EM:
# eg current frame depends on previous frame in some way.
+# TODO:
+# - Add biblio
+# - Look back at articles for discussion for init, regularization and
+# convergence rates
+# - the function sufficient_statistics does not really return SS. This is not a
+# big problem, but it would be better to really return them as the name implied.
+
import numpy as N
from numpy import mean
from numpy.testing import assert_array_almost_equal, assert_array_equal
@@ -18,12 +25,12 @@
from gauss_mix import GM
from kmean import kmean
-print "======================================================"
-import traceback
-f = traceback.extract_stack()
-print f[0][0] + " This is beta code, don't use it ! "
-print "======================================================"
+import copy
+from numpy.random import seed
+# Clamp
+clamp = 1e-8
+
# Error classes
class OnGmmError(Exception):
"""Base class for exceptions in this module."""
@@ -43,11 +50,15 @@
return self.message
class OnGMM(ExpMixtureModel):
- def init_kmean(self, init_data, niter = 5):
+ """A Class for 'online' (ie recursive) EM. Insteand
+ of running the E step on the whole data, the sufficient statistics
+ are updated for each new frame of data, and used in the (unchanged)
+ M step"""
+ def init_random(self, init_data):
""" Init the model at random."""
k = self.gm.k
d = self.gm.d
- if mode == 'diag':
+ if self.gm.mode == 'diag':
w = N.ones(k) / k
# Init the internal state of EM
@@ -66,14 +77,56 @@
mode %s yet""", mode)
self.gm.set_param(w, mu, va)
- self.cw = w
- self.cmu = mu
- self.cva = va
+ # c* are the parameters which are computed at every step (ie
+ # when a new frame is taken into account
+ self.cw = self.gm.w
+ self.cmu = self.gm.mu
+ self.cva = self.gm.va
- self.pw = self.cw.copy()
- self.pmu = self.cmu.copy()
- self.pva = self.cva.copy()
+ # p* are the parameters used when computing gaussian densities
+ # they are always the same than c* in the online case
+ self.pw = self.cw
+ self.pmu = self.cmu
+ self.pva = self.cva
+ def init_kmean(self, init_data, niter = 5):
+ """ Init the model using kmean."""
+ k = self.gm.k
+ d = self.gm.d
+ if self.gm.mode == 'diag':
+ w = N.ones(k) / k
+
+ # Init the internal state of EM
+ self.cx = N.outer(w, mean(init_data, 0))
+ self.cxx = N.outer(w, mean(init_data ** 2, 0))
+
+ # w, mu and va init is the same that in the standard case
+ (code, label) = kmean(init_data, init_data[0:k, :], niter)
+ mu = code.copy()
+ va = N.zeros((k, d))
+ for i in range(k):
+ for j in range(d):
+ va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+ else:
+ raise OnGmmParamError("""init_online not implemented for
+ mode %s yet""", mode)
+
+ self.gm.set_param(w, mu, va)
+ # c* are the parameters which are computed at every step (ie
+ # when a new frame is taken into account
+ self.cw = self.gm.w
+ self.cmu = self.gm.mu
+ self.cva = self.gm.va
+
+ # p* are the parameters used when computing gaussian densities
+ # they are the same than c* in the online case
+ # self.pw = self.cw.copy()
+ # self.pmu = self.cmu.copy()
+ # self.pva = self.cva.copy()
+ self.pw = self.cw
+ self.pmu = self.cmu
+ self.pva = self.cva
+
def __init__(self, gm, init_data, init = 'kmean'):
self.gm = gm
@@ -90,7 +143,9 @@
gamma *= self.pw
gamma /= N.sum(gamma)
# <1>(t) = cw(t), each column is one component cw = (cw1, ..., cwK);
- self.cw = (1 - nu) * self.cw + nu * gamma
+ #self.cw = (1 - nu) * self.cw + nu * gamma
+ self.cw *= (1 - nu)
+ self.cw += nu * gamma
return gamma
@@ -102,264 +157,117 @@
self.cmu[k] = self.cx[k] / self.cw[k]
self.cva[k] = self.cxx[k] / self.cw[k] - self.cmu[k] ** 2
-if __name__ == "__main__":
- import copy
- #=============================
- # Simple GMM with 2 components
- #=============================
-
- #+++++++++++++++++++++++++++++
- # Meta parameters of the model
- # - k: Number of components
- # - d: dimension of each Gaussian
- # - mode: Mode of covariance matrix: full or diag
- # - nframes: number of frames (frame = one data point = one
- # row of d elements
- k = 2
+if __name__ == '__main__':
d = 1
+ k = 2
mode = 'diag'
nframes = int(1e3)
- emiter = 10
+ emiter = 4
+ seed(5)
- #+++++++++++++++++++++++++++++++++++++++++++
- # Create an artificial GMM model, samples it
- #+++++++++++++++++++++++++++++++++++++++++++
- print "Generating the mixture"
+ #+++++++++++++++++++++++++++++++++++++++++++++++++
# Generate a model with k components, d dimensions
+ #+++++++++++++++++++++++++++++++++++++++++++++++++
w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
gm = GM.fromvalues(w, mu, va)
-
# Sample nframes frames from the model
- data = gm.sample(nframes, )
+ data = gm.sample(nframes)
- #++++++++++++++++++++++++
- # Learn the model with EM
- #++++++++++++++++++++++++
-
+ #++++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with classical EM
+ #++++++++++++++++++++++++++++++++++++++++++
# Init the model
- print "Init a model for learning, with kmean for initialization"
lgm = GM(d, k, mode)
gmm = GMM(lgm, 'kmean')
gmm.init(data)
- # Keep the initialized model for drawing
- gm0 = copy.copy(lgm)
-
+ gm0 = copy.copy(gmm.gm)
# The actual EM, with likelihood computation
like = N.zeros(emiter)
- print "computing..."
for i in range(emiter):
g, tgd = gmm.sufficient_statistics(data)
like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
gmm.update_em(data, g)
- #+++++++++++++++++++++++++++++++
- # Learn the model with Online EM
- #+++++++++++++++++++++++++++++++
+ #++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with online EM
+ #++++++++++++++++++++++++++++++++++++++++
ogm = GM(d, k, mode)
ogmm = OnGMM(ogm, 'kmean')
- #init_data = data[0:nframes / 20, :]
- init_data = data
+ init_data = data[0:nframes / 20, :]
ogmm.init(init_data)
- ogm2 = GM(d, k, mode)
- ogmm2 = OnGMM(ogm2, 'kmean')
- #init_data = data[0:nframes / 20, :]
- init_data = data
- ogmm2.init(init_data)
-
- ogm0 = copy.copy(ogm)
- assert_array_equal(ogm0.w, gm0.w)
- assert_array_equal(ogm0.mu, gm0.mu)
- assert_array_equal(ogm0.va, gm0.va)
-
- ogm02 = copy.copy(ogm2)
- assert_array_equal(ogm02.w, gm0.w)
- assert_array_equal(ogm02.mu, gm0.mu)
- assert_array_equal(ogm02.va, gm0.va)
-
- w0 = gm0.w.copy()
- mu0 = gm0.mu.copy()
- va0 = gm0.va.copy()
-
- cx = ogmm.cx
- cxx = ogmm.cxx
-
- cw = w0.copy()
- cmu = mu0.copy()
- cva = va0.copy()
-
- pw = cw.copy()
- pmu = cmu.copy()
- pva = cva.copy()
-
# Forgetting param
ku = 0.005
t0 = 200
- lamb = N.ones((nframes, 1))
- lamb[0] = 0
- nu0 = 1.0
- #lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
- #nu0 = 0.2
+ lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+ nu0 = 0.2
nu = N.zeros((len(lamb), 1))
nu[0] = nu0
for i in range(1, len(lamb)):
nu[i] = 1./(1 + lamb[i] / nu[i-1])
- gamma = N.zeros((nframes, k))
- agamma = []
- apw = []
- apmu = []
- apva = []
- print "========== Online Manual ==========="
- for e in range(emiter):
- print "online manual: estep %d, printing p* state " % e
- apw.append(pw.copy())
- apmu.append(pmu.copy())
- apva.append(pva.copy())
- for t in range(nframes):
- gamma[t] = multiple_gauss_den(data[t:t+1, :], pmu, pva)[0]
- gamma[t] *= pw
- gamma[t] /= N.sum(gamma[t])
- # <1>(t) = cw(t), each column is one component cw = (cw1, ..., cwK);
- # <x>(t) = cx(t)
- cw = (1 - nu[t]) * cw + nu[t] * gamma[t]
- # loop through each component
- for i in range(k):
- cx[i] = (1 - nu[t]) * cx[i] + nu[t] * data[t, :] * gamma[t, i]
- cxx[i] = (1 - nu[t]) * cxx[i] + nu[t] * data[t, :] ** 2 * gamma[t, i]
+ # object version of online EM
+ for t in range(nframes):
+ gamma = ogmm.sufficient_statistics(data[t:t+1, :], nu[t])
+ ogmm.update_em(data[t, :], gamma, nu[t])
- cmu[i] = cx[i] / cw[i]
- cva[i] = cxx[i] / cw[i] - cmu[i] ** 2
+ ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
- #pw = cw.copy()
- #pmu = cmu.copy()
- #pva = cva.copy()
- print "gamma[end]: " + str(gamma[-1])
- pw = cw.copy()
- pmu = cmu.copy()
- pva = cva.copy()
- agamma.append(gamma.copy())
+ #+++++++++++++++
+ # Draw the model
+ #+++++++++++++++
+ print "drawing..."
+ import pylab as P
+ P.subplot(2, 1, 1)
- gamma2 = N.zeros((nframes, k))
- agamma2 = []
- apw2 = []
- apmu2 = []
- apva2 = []
- print "========== Online Automatic ==========="
- for e in range(emiter):
- print "online automatic: estep %d, printing p* state " % e
- apw2.append(ogmm2.pw.copy())
- apmu2.append(ogmm2.pmu.copy())
- apva2.append(ogmm2.pva.copy())
- for t in range(nframes):
- gamma2[t] = ogmm2.sufficient_statistics(data[t:t+1, :], nu[t])
- #gamma2[t] = multiple_gauss_den(data[t:t+1, :], ogmm2.pmu, ogmm2.pva)[0]
- #gamma2[t] *= ogmm2.pw
- #gamma2[t] /= N.sum(gamma2[t])
- #try:
- # assert_array_equal(agamma, gamma2[t])
- #except AssertionError:
- # print "error for estep %d, step %d" % (e, t)
- # print ogmm2.pw
- # print ogmm2.pmu
- # print ogmm2.pva
- # raise
- ogmm2.update_em(data[t, :], gamma2[t], nu[t])
- #ogmm2.cw = (1 - nu[t]) * ogmm2.cw + nu[t] * agamma
- ## loop through each component
- #for i in range(k):
- # ogmm2.cx[i] = (1 - nu[t]) * ogmm2.cx[i] + nu[t] * data[t, :] * agamma[i]
- # ogmm2.cxx[i] = (1 - nu[t]) * ogmm2.cxx[i] + nu[t] * data[t, :] ** 2 * agamma[i]
+ if not d == 1:
+ # Draw what is happening
+ P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
- # ogmm2.cmu[i] = ogmm2.cx[i] / ogmm2.cw[i]
- # ogmm2.cva[i] = ogmm2.cxx[i] / ogmm2.cw[i] - ogmm2.cmu[i] ** 2
+ h = gm.plot()
+ [i.set_color('g') for i in h]
+ h[0].set_label('true confidence ellipsoides')
- print "gamma[end]: " + str(gamma2[-1])
- agamma2.append(gamma2.copy())
- ogmm2.pw = ogmm2.cw.copy()
- ogmm2.pmu = ogmm2.cmu.copy()
- ogmm2.pva = ogmm2.cva.copy()
+ h = gm0.plot()
+ [i.set_color('k') for i in h]
+ h[0].set_label('initial confidence ellipsoides')
- # #ogm.set_param(pw, pmu, pva)
- # print "weights off vs on: \n" + str(lgm.w) + "\n " + str(cw)
- # print "mean off vs on: \n" + str(lgm.mu) + "\n " + str(cmu)
- # print "variances off vs on: \n" + str(lgm.va) + "\n " + str(cva)
- # print "weights off vs on2: \n" + str(lgm.w) + "\n " + str(ogmm2.cw)
- # print "mean off vs on2: \n" + str(lgm.mu) + "\n " + str(ogmm2.cmu)
- # print "variances off vs on2: \n" + str(lgm.va) + "\n " + str(ogmm2.cva)
- # assert_array_almost_equal(cw, lgm.w)
- # assert_array_almost_equal(cmu, lgm.mu)
- # assert_array_almost_equal(cva, lgm.va)
- assert_array_equal(ogmm2.pw, pw)
- assert_array_equal(ogmm2.pmu, pmu)
- assert_array_equal(ogmm2.pva, pva)
- assert_array_equal(agamma[0], agamma2[0])
- #assert_array_almost_equal(ogmm2.cw, lgm.w)
- #assert_array_almost_equal(ogmm2.cmu, lgm.mu)
- #assert_array_almost_equal(ogmm2.cva, lgm.va)
- # #+++++++++++++++
- # # Draw the model
- # #+++++++++++++++
- # print "drawing..."
- # import pylab as P
- # P.subplot(2, 1, 1)
+ h = lgm.plot()
+ [i.set_color('r') for i in h]
+ h[0].set_label('confidence ellipsoides found by EM')
- # if d == 1:
- # # Real confidence ellipses
- # h = gm.plot1d()
- # [i.set_color('g') for i in h['pdf']]
- # h['pdf'][0].set_label('true pdf')
+ h = ogmm.gm.plot()
+ [i.set_color('m') for i in h]
+ h[0].set_label('confidence ellipsoides found by Online EM')
- # # Initial confidence ellipses as found by kmean
- # h0 = gm0.plot1d()
- # [i.set_color('k') for i in h0['pdf']]
- # h0['pdf'][0].set_label('initial pdf')
+ # P.legend(loc = 0)
+ else:
+ # Real confidence ellipses
+ h = gm.plot1d()
+ [i.set_color('g') for i in h['pdf']]
+ h['pdf'][0].set_label('true pdf')
- # # Values found by EM
- # hl = lgm.plot1d(fill = 1, level = 0.66)
- # [i.set_color('r') for i in hl['pdf']]
- # hl['pdf'][0].set_label('pdf found by EM')
+ # Initial confidence ellipses as found by kmean
+ h0 = gm0.plot1d()
+ [i.set_color('k') for i in h0['pdf']]
+ h0['pdf'][0].set_label('initial pdf')
- # # Values found by Online EM
- # ho = ogm.plot1d(fill = 1, level = 0.66)
- # [i.set_color('b') for i in ho['pdf']]
- # ho['pdf'][0].set_label('pdf found by online EM')
+ # Values found by EM
+ hl = lgm.plot1d(fill = 1, level = 0.66)
+ [i.set_color('r') for i in hl['pdf']]
+ hl['pdf'][0].set_label('pdf found by EM')
- # P.legend(loc = 0)
- # else:
- # # Draw what is happening
- # P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+ P.legend(loc = 0)
- # # Real confidence ellipses
- # Xre, Yre = gm.conf_ellipses()
- # P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
- # for i in range(1,k):
- # P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
+ # Values found by Online EM
+ hl = ogmm.gm.plot1d(fill = 1, level = 0.66)
+ [i.set_color('m') for i in hl['pdf']]
+ hl['pdf'][0].set_label('pdf found by Online EM')
- # # Initial confidence ellipses as found by kmean
- # X0e, Y0e = gm0.conf_ellipses()
- # P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
- # for i in range(1,k):
- # P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
+ P.legend(loc = 0)
- # # Values found by EM
- # Xe, Ye = lgm.conf_ellipses()
- # P.plot(Xe[0], Ye[0], 'r', label = 'confidence ellipsoides found by EM')
- # for i in range(1,k):
- # P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
- # P.legend(loc = 0)
-
- # # Values found by Online EM
- # Xe, Ye = ogm.conf_ellipses()
- # P.plot(Xe[0], Ye[0], 'm', label = 'confidence ellipsoides found by Online EM')
- # for i in range(1,k):
- # P.plot(Xe[i], Ye[i], 'm', label = '_nolegend_')
- # P.legend(loc = 0)
-
-
- # P.subplot(2, 1, 2)
- # P.plot(like)
- # P.title('log likelihood')
-
- # P.show()
+ P.subplot(2, 1, 2)
+ P.plot(nu)
+ P.title('Learning rate')
+ P.show()
Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/setup.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Thu Oct 12 11:00 PM 2006 J
+# Last Change: Thu Oct 19 07:00 PM 2006 J
# TODO:
# - check how to handle cmd line build options with distutils and use
# it in the building process
@@ -34,8 +34,7 @@
if __name__ == "__main__":
from numpy.distutils.core import setup
#setup(**configuration(top_path='').todict())
- setup(**configuration(top_path='',
- package_name='scipy.sandbox.pyem').todict())
+ setup(**configuration(top_path='',))
# from distutils.core import setup, Extension
# from pyem import version as pyem_version
#
Added: trunk/Lib/sandbox/pyem/tests/generate_test_data.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/generate_test_data.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/tests/generate_test_data.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -0,0 +1,53 @@
+# Last Change: Wed Oct 18 06:00 PM 2006 J
+
+import numpy as N
+import tables as T
+
+from numpy.random import seed
+
+from gmm_em import multiple_gauss_den
+from gauss_mix import GM
+from _c_densities import gauss_den
+
+filename = 'test_mgden.h5';
+h5file = T.openFile(filename, 'w')
+h5file.createGroup(h5file.root, 'hyperparams')
+h5file.createGroup(h5file.root, 'params')
+h5file.createGroup(h5file.root, 'data')
+
+d = 1
+k = 2
+type = 'diag'
+nframes = int(1e3)
+
+h5file.createArray(h5file.root.hyperparams, 'dimension', d)
+h5file.createArray(h5file.root.hyperparams, 'type', type)
+h5file.createArray(h5file.root.hyperparams, 'nclusters', k)
+
+w, mu, va = GM.gen_param(d, k, type)
+
+h5file.createArray(h5file.root.params, 'weights', w)
+h5file.createArray(h5file.root.params, 'means', mu)
+h5file.createArray(h5file.root.params, 'variances', va)
+
+gm = GM.fromvalues(w, mu, va)
+# Sample nframes frames from the model
+data = gm.sample(nframes)
+
+h5file.createArray(h5file.root.data, 'data', data)
+
+w1, mu1, va1 = GM.gen_param(d, k, type)
+
+out = multiple_gauss_den(data, mu1, va1)
+out1 = gauss_den(data, mu1[0, :], va1[0, :])
+
+h5file.createArray(h5file.root.params, 'w', w1)
+h5file.createArray(h5file.root.params, 'mu', mu1)
+h5file.createArray(h5file.root.params, 'va', va1)
+h5file.createArray(h5file.root.data, 'out', out)
+
+h5file.createArray(h5file.root.params, 'mu1', mu1[0,:])
+h5file.createArray(h5file.root.params, 'va1', va1[0,:])
+h5file.createArray(h5file.root.data, 'out1', out1)
+
+h5file.close()
Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -1,6 +1,11 @@
#! /usr/bin/env python
-# Last Change: Fri Sep 29 06:00 PM 2006 J
+# Last Change: Thu Oct 19 07:00 PM 2006 J
+# TODO:
+# - having "fake tests" to check that all mode (scalar, diag and full) are
+# executables
+# - having a dataset to check against
+
import sys
from numpy.testing import *
@@ -8,8 +13,6 @@
set_package_path()
from pyem.densities import gauss_den
-from pyem._c_densities import gauss_den as c_gauss_den
-
restore_path()
#Optional:
@@ -17,7 +20,9 @@
# import modules that are located in the same directory as this file.
restore_path()
-class test_densities(NumpyTestCase):
+DEF_DEC = 12
+
+class TestDensities(NumpyTestCase):
def _generate_test_data_1d(self):
self.va = 2.0
self.mu = 1.0
@@ -61,37 +66,44 @@
0.00378789836599, 0.00015915297541,
0.00000253261067, 0.00000001526368])
- def _check_py(self, level, decimal = 12):
+class test_py_implementation(TestDensities):
+ def _check(self, level, decimal = DEF_DEC):
Y = gauss_den(self.X, self.mu, self.va)
assert_array_almost_equal(Y, self.Yt, decimal)
- def _check_c(self, level, decimal = 12):
- Y = c_gauss_den(self.X, self.mu, self.va)
- assert_array_almost_equal(Y, self.Yt, decimal)
+ def check_2d_diag(self, level = 0):
+ self._generate_test_data_2d_diag()
+ self._check(level)
- def check_py_1d(self, level = 1):
+ def check_2d_full(self, level = 0):
+ self._generate_test_data_2d_full()
+ self._check(level)
+
+ def check_py_1d(self, level = 0):
self._generate_test_data_1d()
- self._check_py(level)
+ self._check(level)
- def check_py_2d_diag(self, level = 1):
- self._generate_test_data_2d_diag()
- self._check_py(level)
+class test_c_implementation(TestDensities):
+ def _check(self, level, decimal = DEF_DEC):
+ try:
+ from pyem._c_densities import gauss_den as c_gauss_den
+ Y = c_gauss_den(self.X, self.mu, self.va)
+ assert_array_almost_equal(Y, self.Yt, decimal)
+ except ImportError, inst:
+ print "Error while importing C implementation, not tested"
+ print " -> (Import error was %s)" % inst
- def check_py_2d_full(self, level = 1):
- self._generate_test_data_2d_full()
- self._check_py(level)
-
- def check_c_1d(self, level = 1):
+ def check_1d(self, level = 0):
self._generate_test_data_1d()
- self._check_c(level)
+ self._check(level)
- def check_c_2d_diag(self, level = 1):
+ def check_2d_diag(self, level = 0):
self._generate_test_data_2d_diag()
- self._check_c(level)
+ self._check(level)
- def check_c_2d_full(self, level = 1):
+ def check_2d_full(self, level = 0):
self._generate_test_data_2d_full()
- self._check_c(level)
+ self._check(level)
if __name__ == "__main__":
NumpyTest().run()
Added: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -0,0 +1,11 @@
+#! /usr/bin/env python
+# Last Change: Fri Oct 20 11:00 AM 2006 J
+
+import sys
+from numpy.testing import *
+
+import numpy as N
+
+set_package_path()
+from pyem.densities import gauss_den
+restore_path()
Added: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-10-18 19:27:50 UTC (rev 2290)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py 2006-10-20 04:01:05 UTC (rev 2291)
@@ -0,0 +1,188 @@
+#! /usr/bin/env python
+# Last Change: Fri Oct 20 12:00 PM 2006 J
+
+import copy
+
+import sys
+from numpy.testing import *
+
+import numpy as N
+from numpy.random import seed
+
+set_package_path()
+from pyem import GM, GMM
+from pyem.online_em import OnGMM
+restore_path()
+
+# #Optional:
+# set_local_path()
+# # import modules that are located in the same directory as this file.
+# restore_path()
+
+# Error precision allowed (nb of decimals)
+AR_AS_PREC = 12
+
+class OnlineEmTest(NumpyTestCase):
+ def _create_model(self, d, k, mode, nframes, emiter):
+ #+++++++++++++++++++++++++++++++++++++++++++++++++
+ # Generate a model with k components, d dimensions
+ #+++++++++++++++++++++++++++++++++++++++++++++++++
+ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5)
+ gm = GM.fromvalues(w, mu, va)
+ # Sample nframes frames from the model
+ data = gm.sample(nframes)
+
+ #++++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with classical EM
+ #++++++++++++++++++++++++++++++++++++++++++
+ # Init the model
+ lgm = GM(d, k, mode)
+ gmm = GMM(lgm, 'kmean')
+ gmm.init(data)
+
+ self.gm0 = copy.copy(gmm.gm)
+ # The actual EM, with likelihood computation
+ for i in range(emiter):
+ g, tgd = gmm.sufficient_statistics(data)
+ gmm.update_em(data, g)
+
+ self.data = data
+ self.gm = lgm
+
+class test_on_off_eq(OnlineEmTest):
+ def check_1d(self, level = 1):
+ d = 1
+ k = 2
+ mode = 'diag'
+ nframes = int(1e2)
+ emiter = 3
+
+ seed(1)
+ self._create_model(d, k, mode, nframes, emiter)
+ self._check(d, k, mode, nframes, emiter)
+
+ def check_2d(self, level = 2):
+ d = 2
+ k = 2
+ mode = 'diag'
+ nframes = int(1e2)
+ emiter = 3
+
+ seed(1)
+ self._create_model(d, k, mode, nframes, emiter)
+ self._check(d, k, mode, nframes, emiter)
+
+ def check_5d(self, level = 2):
+ d = 5
+ k = 2
+ mode = 'diag'
+ nframes = int(1e2)
+ emiter = 3
+
+ seed(1)
+ self._create_model(d, k, mode, nframes, emiter)
+ self._check(d, k, mode, nframes, emiter)
+
+ def _check(self, d, k, mode, nframes, emiter):
+ #++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with online EM
+ #++++++++++++++++++++++++++++++++++++++++
+ # Learn the model with Online EM
+ ogm = GM(d, k, mode)
+ ogmm = OnGMM(ogm, 'kmean')
+ init_data = self.data
+ ogmm.init(init_data)
+
+ # Check that online kmean init is the same than kmean offline init
+ ogm0 = copy.copy(ogm)
+ assert_array_equal(ogm0.w, self.gm0.w)
+ assert_array_equal(ogm0.mu, self.gm0.mu)
+ assert_array_equal(ogm0.va, self.gm0.va)
+
+ # Forgetting param
+ lamb = N.ones((nframes, 1))
+ lamb[0] = 0
+ nu0 = 1.0
+ nu = N.zeros((len(lamb), 1))
+ nu[0] = nu0
+ for i in range(1, len(lamb)):
+ nu[i] = 1./(1 + lamb[i] / nu[i-1])
+
+ # object version of online EM: the p* arguments are updated only at each
+ # epoch, which is equivalent to on full EM iteration on the
+ # classic EM algorithm
+ ogmm.pw = ogmm.cw.copy()
+ ogmm.pmu = ogmm.cmu.copy()
+ ogmm.pva = ogmm.cva.copy()
+ for e in range(emiter):
+ for t in range(nframes):
+ gamma = ogmm.sufficient_statistics(self.data[t:t+1, :], nu[t])
+ ogmm.update_em(self.data[t, :], gamma, nu[t])
+
+ # Change pw args only a each epoch
+ ogmm.pw = ogmm.cw.copy()
+ ogmm.pmu = ogmm.cmu.copy()
+ ogmm.pva = ogmm.cva.copy()
+
+ # For equivalence between off and on, we allow a margin of error,
+ # because of round-off errors.
+ print " Checking precision of equivalence with offline EM trainer "
+ maxtestprec = 18
+ try :
+ for i in range(maxtestprec):
+ assert_array_almost_equal(self.gm.w, ogmm.pw, decimal = i)
+ assert_array_almost_equal(self.gm.mu, ogmm.pmu, decimal = i)
+ assert_array_almost_equal(self.gm.va, ogmm.pva, decimal = i)
+ print "\t !! Precision up to %d decimals !! " % i
+ except AssertionError:
+ if i < AR_AS_PREC:
+ print """\t !!NOT OK: Precision up to %d decimals only,
+ outside the allowed range (%d) !! """ % (i, AR_AS_PREC)
+ raise AssertionError
+ else:
+ print "\t !!OK: Precision up to %d decimals !! " % i
+
+class test_on(OnlineEmTest):
+ def check_consistency(self):
+ d = 1
+ k = 2
+ mode = 'diag'
+ nframes = int(1e3)
+ emiter = 4
+
+ self._create_model(d, k, mode, nframes, emiter)
+ self._run_pure_online(d, k, mode, nframes)
+
+ def _run_pure_online(self, d, k, mode, nframes):
+ #++++++++++++++++++++++++++++++++++++++++
+ # Approximate the models with online EM
+ #++++++++++++++++++++++++++++++++++++++++
+ ogm = GM(d, k, mode)
+ ogmm = OnGMM(ogm, 'kmean')
+ init_data = self.data[0:nframes / 20, :]
+ ogmm.init(init_data)
+
+ # Forgetting param
+ ku = 0.005
+ t0 = 200
+ lamb = 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+ nu0 = 0.2
+ nu = N.zeros((len(lamb), 1))
+ nu[0] = nu0
+ for i in range(1, len(lamb)):
+ nu[i] = 1./(1 + lamb[i] / nu[i-1])
+
+ # object version of online EM
+ for t in range(nframes):
+ # the assert are here to check we do not create copies
+ # unvoluntary for parameters
+ assert ogmm.pw is ogmm.cw
+ assert ogmm.pmu is ogmm.cmu
+ assert ogmm.pva is ogmm.cva
+ gamma = ogmm.sufficient_statistics(self.data[t:t+1, :], nu[t])
+ ogmm.update_em(self.data[t, :], gamma, nu[t])
+
+ ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+
+if __name__ == "__main__":
+ NumpyTest().run()
More information about the Scipy-svn
mailing list