From scipy-svn at scipy.org  Fri Jun  1 04:22:13 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri,  1 Jun 2007 03:22:13 -0500 (CDT)
Subject: [Scipy-svn] r3066 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070601082213.7FB4B39C02F@new.scipy.org>

Author: cdavid
Date: 2007-06-01 03:21:52 -0500 (Fri, 01 Jun 2007)
New Revision: 3066

Removed:
   trunk/Lib/sandbox/pyem/kmean.py
   trunk/Lib/sandbox/pyem/tests/test_kmean.py
Modified:
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/online_em.py
   trunk/Lib/sandbox/pyem/setup.py
Log:
Remove kmean as scipy.cluster.vq.kmeans2 does everything we need now

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Nov 16 02:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
@@ -12,7 +12,8 @@
 from numpy.random import randn
 #import _c_densities as densities
 import densities
-from kmean import kmean
+#from kmean import kmean
+from scipy.cluster.vq import kmeans2 as kmean
 from gauss_mix import GM
 
 from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND

Deleted: trunk/Lib/sandbox/pyem/kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/kmean.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/kmean.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,76 +0,0 @@
-# /usr/bin/python
-# Last Change: Thu Sep 28 01:00 PM 2006 J
-
-#TODO:
-#   - a demo for kmeans
-
-import numpy as N
-
-def _py_vq(data, code):
-    """ Please do not use directly. Use kmean instead"""
-    # No attempt to be efficient has been made...
-    (n, d)  = data.shape
-    (k, d)  = code.shape
-
-    label   = N.zeros(n, int)
-    for i in range(n):
-        d           = N.sum((data[i, :] - code) ** 2, 1)
-        label[i]    = N.argmin(d)
-
-    return label
-    
-# Try to import pyrex function for vector quantization. If not available,
-# falls back on pure python implementation.
-#%KMEANIMPORT%
-#try:
-#    from scipy.cluster.vq import kmeans as kmean
-#except ImportError:
-#    try:
-#        from c_gmm import _vq
-#    except:
-#        print """c_gmm._vq not found, using pure python implementation instead. 
-#        Kmean will be REALLY slow"""
-#        _vq = _py_vq
-try:
-    from scipy.cluster.vq import vq
-    print "using scipy.cluster.vq"
-    def _vq(*args, **kw): return vq(*args, **kw)[0]
-except ImportError:
-    try:
-        from c_gmm import _vq
-        print "using pyrex vq"
-    except ImportError:
-        print """c_gmm._vq not found, using pure python implementation instead. 
-        Kmean will be REALLY slow"""
-        _vq = _py_vq
-
-def kmean(data, init, iter = 10):
-    """Simple kmean implementation for EM. Runs iter iterations.
-    
-    returns a tuple (code, label), where code are the final
-    centroids, and label are the class label indec for each
-    frame (ie row) of data"""
-
-    data    = N.atleast_2d(data)
-    init    = N.atleast_2d(init)
-
-    (n, d)  = data.shape
-    (k, d1) = init.shape
-
-    if not d == d1:
-        msg = "data and init centers do not have same dimensions..."
-        raise GmmParamError(msg)
-    
-    code    = N.asarray(init.copy())
-    for i in range(iter):
-        # Compute the nearest neighbour for each obs
-        # using the current code book
-        label   = _vq(data, code)
-        # Update the code by computing centroids using the new code book
-        for j in range(k):
-            code[j,:] = N.mean(data[N.where(label==j)], axis=0) 
-
-    return code, label
-
-if __name__ == "__main__":
-    pass

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Wed Dec 06 09:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 
 #---------------------------------------------
 # This is not meant to be used yet !!!! I am 
@@ -23,7 +23,7 @@
 
 from gmm_em import ExpMixtureModel, GMM, EM
 from gauss_mix import GM
-from kmean import kmean
+from scipy.cluster.vq import kmeans2 as kmean
 import densities2 as D
 
 import copy

Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/setup.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Wed Dec 06 08:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 # TODO:
 #   - check how to handle cmd line build options with distutils and use
 #   it in the building process
@@ -15,7 +15,6 @@
 for estimating meta parameters of mixtures. """
 
 from os.path import join
-# This import from __init__ looks strange, should check whether there is no other way
 from info import version as pyem_version
 
 DISTNAME    = 'pyem' 
@@ -32,12 +31,8 @@
     config.add_data_dir('tests')
     config.add_data_dir('profile_data')
     config.add_extension('c_gden',
-                         #define_macros=[('LIBSVM_EXPORTS', None),
-                         #               ('LIBSVM_DLL', None)],
                          sources=[join('src', 'c_gden.c')])
     config.add_extension('_rawden',
-                         #define_macros=[('LIBSVM_EXPORTS', None),
-                         #               ('LIBSVM_DLL', None)],
                          sources=[join('src', 'pure_den.c')])
 
     return config
@@ -47,108 +42,3 @@
     #setup(**configuration(top_path='').todict())
     #setup(**configuration(top_path=''))
     setup(configuration=configuration)
-# from distutils.core import setup, Extension
-# from pyem import version as pyem_version
-# 
-# # distutils does not update MANIFEST correctly, removes it
-# import os
-# if os.path.exists('MANIFEST'): os.remove('MANIFEST')
-# from os.path import join
-# 
-# import re
-# 
-# from numpy.distutils.misc_util import get_numpy_include_dirs
-# NUMPYINC    = get_numpy_include_dirs()[0]
-# 
-# # General variables:
-# #   - DISTNAME: name of the distributed package
-# #   - VERSION: the version reference is in pyem/__init__.py file
-# #   - other upper cased variables are the same than the corresponding 
-# #   keywords in setup call
-# DISTNAME    = 'pyem' 
-# VERSION     = pyem_version
-# DESCRIPTION ='A python module for Expectation Maximization learning of mixtures pdf',
-# AUTHOR      ='David Cournapeau',
-# AUTHOR_EMAIL='david at ar.media.kyoto-u.ac.jp',
-# URL         ='http://ar.media.kyoto-u.ac.jp/members/david',
-# 
-# # Source files for extensions
-# 
-# # Functions used to substitute values in File.
-# # Mainly use to replace config.h capabilities
-# def do_subst_in_file(sourcefile, targetfile, dict):
-#     """Replace all instances of the keys of dict with their values.
-#     For example, if dict is {'%VERSION%': '1.2345', '%BASE%': 'MyProg'},
-#     then all instances of %VERSION% in the file will be replaced with 1.2345 etc.
-#     """
-#     try:
-#         f = open(sourcefile, 'rb')
-#         contents = f.read()
-#         f.close()
-#     except:
-#         raise IOError, "Can't read source file %s"%sourcefile
-# 
-#     for (k,v) in dict.items():
-#         contents = re.sub(k, v, contents)
-#     try:
-#         f = open(targetfile, 'wb')
-#         f.write(contents)
-#         f.close()
-#     except:
-#         raise IOError, "Can't read source file %s"%sourcefile
-#     return 0 # success
-#  
-# class SetupOption:
-#     def __init__(self):
-#         self.kmean      = 'py'
-#         self.ext_modules= [Extension(join('pyem', 'c_gden'),
-#                               sources=[join('pyem', 'src', 'c_gden.c')]) ]
-#         self.cmdclass   = {}
-#         self.subsdic     = {'%KMEANIMPORT%': []}
-# 
-#     def _config_kmean(self):
-#         # Check in this order:
-#         #   - kmean in scipy.cluster,
-#         #   - custom vq with pyrex 
-#         #   - custom pure python vq
-#         #try:
-#         #    from scipy.cluster.vq import kmeans
-#         #    self.kmean  = 'scipy'
-#         #    #self.subsdic['%KMEANIMPORT%']   = scipy_kmean
-#         #except ImportError:
-#         #    try:
-#         #        from Pyrex.Distutils import build_ext
-#         #        self.kmean  = 'pyrex'
-#         #        self.ext_modules.append(Extension('pyem/c_gmm', 
-#         #            ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
-#         #        self.cmdclass['build_ext']  = build_ext
-#         #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         #    except ImportError:
-#         #        self.kmean  = 'py'
-#         #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         try:
-#             from Pyrex.Distutils import build_ext
-#             self.kmean  = 'pyrex'
-#             self.ext_modules.append(Extension('pyem/c_gmm', 
-#                 ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
-#             self.cmdclass['build_ext']  = build_ext
-#             #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         except ImportError:
-#             self.kmean  = 'py'
-#             #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#     def setup(self):
-#         self._config_kmean()
-#         #import time
-#         #do_subst_in_file('pyem/kmean.py.in', 'pyem/kmean.py', self.subsdic)
-#         setup(name      = DISTNAME,
-#             version     = VERSION,
-#             description = DESCRIPTION,
-#             author      = AUTHOR,
-#             author_email= AUTHOR_EMAIL,
-#             url         = URL,
-#             packages    = ['pyem', 'pyem.tests', 'pyem.profile_data'],
-#             ext_modules = self.ext_modules,
-#             cmdclass    = self.cmdclass)
-# 
-# stpobj  = SetupOption()
-# stpobj.setup()

Deleted: trunk/Lib/sandbox/pyem/tests/test_kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_kmean.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/tests/test_kmean.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,46 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Thu Sep 28 01:00 PM 2006 J
-
-import sys
-from numpy.testing import *
-
-import numpy as N
-
-set_package_path()
-from pyem.kmean import kmean
-restore_path()
-
-#Optional:
-set_local_path()
-# import modules that are located in the same directory as this file.
-restore_path()
-
-# Global data
-X   = N.array([[3.0, 3], [4, 3], [4, 2],
-        [9, 2], [5, 1], [6, 2], [9, 4], 
-        [5, 2], [5, 4], [7, 4], [6, 5]])
-
-codet1  = N.array([[3.0000, 3.0000],
-        [6.2000, 4.0000], 
-        [5.8000, 1.8000]])
-        
-codet2  = N.array([[11.0/3, 8.0/3], 
-        [6.7500, 4.2500],
-        [6.2500, 1.7500]])
-
-class test_kmean(NumpyTestCase):
-    def check_iter1(self, level=1):
-        initc   = N.concatenate(([[X[0]], [X[1]], [X[2]]])) 
-        code    = initc.copy()
-        code1   = kmean(X, code, 1)[0]
-
-        assert_array_almost_equal(code1, codet1)
-    def check_iter2(self, level=1):
-        initc   = N.concatenate(([[X[0]], [X[1]], [X[2]]])) 
-        code    = initc.copy()
-        code2   = kmean(X, code, 2)[0]
-
-        assert_array_almost_equal(code2, codet2)
-
-if __name__ == "__main__":
-    NumpyTest().run()


From scipy-svn at scipy.org  Fri Jun  1 04:48:02 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri,  1 Jun 2007 03:48:02 -0500 (CDT)
Subject: [Scipy-svn] r3067 - trunk/Lib/sandbox/lobpcg
Message-ID: <20070601084802.8485139C16C@new.scipy.org>

Author: rc
Date: 2007-06-01 03:47:46 -0500 (Fri, 01 Jun 2007)
New Revision: 3067

Modified:
   trunk/Lib/sandbox/lobpcg/lobpcg.py
Log:
fixed returning wrong eigenvectors


Modified: trunk/Lib/sandbox/lobpcg/lobpcg.py
===================================================================
--- trunk/Lib/sandbox/lobpcg/lobpcg.py	2007-06-01 08:21:52 UTC (rev 3066)
+++ trunk/Lib/sandbox/lobpcg/lobpcg.py	2007-06-01 08:47:46 UTC (rev 3067)
@@ -536,7 +536,7 @@
         blockVectorBX = sc.dot( blockVectorBX, eigBlockVectorX ) + bpp
 
         blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp
-
+        
     aux = blockVectorBX * _lambda[nm.newaxis,:]
     blockVectorR = blockVectorAX - aux
 
@@ -550,14 +550,14 @@
 
     if retLambdaHistory:
         if retResidualNormsHistory:
-            return _lambda, eigBlockVectorX, lambdaHistory, residualNormsHistory
+            return _lambda, blockVectorX, lambdaHistory, residualNormsHistory
         else:
-            return _lambda, eigBlockVectorX, lambdaHistory
+            return _lambda, blockVectorX, lambdaHistory
     else:
         if retResidualNormsHistory:
-            return _lambda, eigBlockVectorX, residualNormsHistory
+            return _lambda, blockVectorX, residualNormsHistory
         else:
-            return _lambda, eigBlockVectorX
+            return _lambda, blockVectorX
 
 ###########################################################################
 if __name__ == '__main__':
@@ -600,3 +600,4 @@
     print 'solution time:', time.clock() - tt
     print eigs
     
+    print vecs


From scipy-svn at scipy.org  Fri Jun  1 08:09:13 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri,  1 Jun 2007 07:09:13 -0500 (CDT)
Subject: [Scipy-svn] r3068 - trunk/Lib/interpolate
Message-ID: <20070601120913.6226539C00F@new.scipy.org>

Author: oliphant
Date: 2007-06-01 07:09:10 -0500 (Fri, 01 Jun 2007)
New Revision: 3068

Modified:
   trunk/Lib/interpolate/__fitpack.h
   trunk/Lib/interpolate/interpolate.py
Log:
Add Lagrange interpolating polynomial

Modified: trunk/Lib/interpolate/__fitpack.h
===================================================================
--- trunk/Lib/interpolate/__fitpack.h	2007-06-01 08:47:46 UTC (rev 3067)
+++ trunk/Lib/interpolate/__fitpack.h	2007-06-01 12:09:10 UTC (rev 3068)
@@ -829,7 +829,7 @@
 "integer-spaced, or cardinal spline matrix a bit faster.";
 static PyObject *_bsplmat(PyObject *dummy, PyObject *args) {
     int k,N,i,numbytes,j, equal;
-    int dims[2];
+    npy_intp dims[2];
     PyObject *x_i_py=NULL;
     PyArrayObject *x_i=NULL, *BB=NULL;
     double *t=NULL, *h=NULL, *ptr;
@@ -970,7 +970,7 @@
 "then it produces the result as if the sample distance were dx";
 static PyObject *_bspldismat(PyObject *dummy, PyObject *args) {
     int k,N,i,j, equal, m;
-    int dims[2];
+    npy_intp dims[2];
     PyObject *x_i_py=NULL;
     PyArrayObject *x_i=NULL, *BB=NULL;
     double *t=NULL, *h=NULL, *ptr, *dptr;

Modified: trunk/Lib/interpolate/interpolate.py
===================================================================
--- trunk/Lib/interpolate/interpolate.py	2007-06-01 08:47:46 UTC (rev 3067)
+++ trunk/Lib/interpolate/interpolate.py	2007-06-01 12:09:10 UTC (rev 3068)
@@ -4,7 +4,7 @@
 """
 
 __all__ = ['interp1d', 'interp2d', 'spline', 'spleval', 'splmake', 'spltopp',
-           'ppform']
+           'ppform', 'lagrange']
 
 from numpy import shape, sometrue, rank, array, transpose, \
      swapaxes, searchsorted, clip, take, ones, putmask, less, greater, \
@@ -23,6 +23,21 @@
         all = sometrue(all,axis=0)
     return all
 
+def lagrange(x, w):
+    """Return the Lagrange interpolating polynomial of the data-points (x,w)
+    """
+    M = len(x)
+    p = poly1d(0.0)
+    for j in xrange(M):
+        pt = poly1d(w[j])
+        for k in xrange(M):
+            if k == j: continue
+            fac = x[j]-x[k]
+            pt *= poly1d([1.0,-x[k]])/fac
+        p += pt
+    return p
+
+
 # !! Need to find argument for keeping initialize.  If it isn't
 # !! found, get rid of it!
 

From scipy-svn at scipy.org  Sat Jun  2 18:12:47 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  2 Jun 2007 17:12:47 -0500 (CDT)
Subject: [Scipy-svn] r3069 - trunk/Lib/ndimage/src
Message-ID: <20070602221247.2FDAC39C08D@new.scipy.org>

Author: stefan
Date: 2007-06-02 17:12:32 -0500 (Sat, 02 Jun 2007)
New Revision: 3069

Modified:
   trunk/Lib/ndimage/src/nd_image.c
Log:
Ensure clean memory de-allocation in Py_Histogram.


Modified: trunk/Lib/ndimage/src/nd_image.c
===================================================================
--- trunk/Lib/ndimage/src/nd_image.c	2007-06-01 12:09:10 UTC (rev 3068)
+++ trunk/Lib/ndimage/src/nd_image.c	2007-06-02 22:12:32 UTC (rev 3069)
@@ -1088,7 +1088,9 @@
                       &max_label, &n_results))
     goto exit;
 
-  histograms = (PyArrayObject**)malloc(input->nd * n_results *
+  /* Set all pointers to NULL, so that freeing the memory */
+  /* doesn't cause problems. */
+  histograms = (PyArrayObject**)calloc(input->nd * n_results,
                                        sizeof(PyArrayObject*));
   if (!histograms) {
     PyErr_NoMemory();


From scipy-svn at scipy.org  Mon Jun  4 00:33:04 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sun,  3 Jun 2007 23:33:04 -0500 (CDT)
Subject: [Scipy-svn] r3070 - in trunk/Lib/sandbox/pyem: . profile_data
Message-ID: <20070604043304.57CA539C120@new.scipy.org>

Author: cdavid
Date: 2007-06-03 23:32:56 -0500 (Sun, 03 Jun 2007)
New Revision: 3070

Added:
   trunk/Lib/sandbox/pyem/data/
Modified:
   trunk/Lib/sandbox/pyem/densities2.py
   trunk/Lib/sandbox/pyem/profile_data/profile_densities.py
Log:
More benchmarking for basic operations in row vs col

Modified: trunk/Lib/sandbox/pyem/densities2.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities2.py	2007-06-02 22:12:32 UTC (rev 3069)
+++ trunk/Lib/sandbox/pyem/densities2.py	2007-06-04 04:32:56 UTC (rev 3070)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Wed Dec 06 09:00 PM 2006 J
+# Last Change: Sat Jun 02 07:00 PM 2007 J
 
 # New version, with default numpy ordering.
 

Modified: trunk/Lib/sandbox/pyem/profile_data/profile_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/profile_data/profile_densities.py	2007-06-02 22:12:32 UTC (rev 3069)
+++ trunk/Lib/sandbox/pyem/profile_data/profile_densities.py	2007-06-04 04:32:56 UTC (rev 3070)
@@ -1,42 +1,78 @@
 import numpy as N
 from numpy.random import randn
-from scipy.sandbox.pyem import densities as D
-from scipy.sandbox.pyem import _c_densities as DC
-#import tables
 
+from numpy.ctypeslib import load_library, ndpointer
+from ctypes import cdll, c_uint, c_int, c_double, POINTER
+
+lib = load_library("blop.so", "file")
+
+arg1    = ndpointer(dtype=N.float64)
+arg2    = c_uint
+arg3    = c_uint
+arg4    = ndpointer(dtype=N.float64)
+arg5    = ndpointer(dtype=N.float64)
+
+lib.compute.argtypes    = [arg1, arg2, arg3, arg4, arg5]
+lib.compute.restype     = c_int
+# Compare computing per component likelihood for frame per row vs frame per column
+def component_likelihood(x, mu, va, log = False):
+    """expect one frame to be one row (rank 2). mu and var are rank 1 array."""
+    d = mu.size
+
+    return N.exp(N.sum((x - mu) ** 2, 1))
+
+def component_likelihood2(x, mu, va, log = False):
+    """expect one frame to be one column (rank 2). mu and var are rank 1 array."""
+    d = mu.size
+
+    y = (x[0] - mu[0]) ** 2
+    for i in range(1, d):
+        y += (x[i] - mu[i]) ** 2
+
+    return N.exp(y)
+
+def component_likelihood3(x, mu, va, log = False):
+    """expect one frame to be one row (rank 2). mu and var are rank 1 array."""
+    d = mu.size
+
+    y = N.empty(x.shape[0], x.dtype)
+    return lib.compute(x, x.shape[0], d, mu, y)
+
 def bench(func, mode = 'diag'):
-    #===========================================
-    # Diag Gaussian of dimension 20
-    #===========================================
     d       = 30
     n       = 1e5
     niter   = 10
 
     print "Compute %d times densities, %d dimension, %d frames" % (niter, d, n)
-    # Generate a model with k components, d dimensions
-    mu  = randn(1, d)
-    if mode == 'diag':
-        va  = abs(randn(1, d))
-    elif mode == 'full':
-        va  = randn(d, d)
-        va  = N.dot(va, va.transpose())
-
+    mu  = randn(d)
+    va  = abs(randn(d))
+    
     X   = randn(n, d)
     for i in range(niter):
         Y   = func(X, mu, va)
 
+def bench2(func, mode = 'diag'):
+    d       = 30
+    n       = 1e5
+    niter   = 10
+
+    print "Compute %d times densities, %d dimension, %d frames" % (niter, d, n)
+    mu  = randn(d)
+    va  = abs(randn(d))
+    
+    X   = randn(d, n)
+    for i in range(niter):
+        Y   = func(X, mu, va)
+
 def benchpy():
-    bench(D.gauss_den)
+    bench(component_likelihood)
 
-def benchc():
-    bench(DC.gauss_den)
+def benchpy3():
+    bench(component_likelihood3)
 
-def benchpyfull():
-    bench(D.gauss_den, 'full')
+def benchpy2():
+    bench2(component_likelihood2)
 
-def benchcfull():
-    bench(DC.gauss_den, 'full')
-
 if __name__ == "__main__":
     import hotshot, hotshot.stats
     profile_file    = 'gdenpy.prof'
@@ -48,7 +84,14 @@
 
     profile_file    = 'gdenc.prof'
     prof    = hotshot.Profile(profile_file, lineevents=1)
-    prof.runcall(benchc)
+    prof.runcall(benchpy2)
     p = hotshot.stats.load(profile_file)
     print p.sort_stats('cumulative').print_stats(20)
     prof.close()
+
+    profile_file    = 'gdenc.prof'
+    prof    = hotshot.Profile(profile_file, lineevents=1)
+    prof.runcall(benchpy3)
+    p = hotshot.stats.load(profile_file)
+    print p.sort_stats('cumulative').print_stats(20)
+    prof.close()


From scipy-svn at scipy.org  Mon Jun  4 07:32:07 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon,  4 Jun 2007 06:32:07 -0500 (CDT)
Subject: [Scipy-svn] r3071 - trunk/Lib/cluster
Message-ID: <20070604113207.771B239C1B8@new.scipy.org>

Author: cdavid
Date: 2007-06-04 06:32:00 -0500 (Mon, 04 Jun 2007)
New Revision: 3071

Modified:
   trunk/Lib/cluster/vq.py
Log:
Add a TODO for kmeans.

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-04 04:32:56 UTC (rev 3070)
+++ trunk/Lib/cluster/vq.py	2007-06-04 11:32:00 UTC (rev 3071)
@@ -13,12 +13,20 @@
         Calculate code book membership of obs
     kmeans(obs,k_or_guess,iter=20,thresh=1e-5) --
         Train a codebook for mimimum distortion using the kmeans algorithm
+    kmeans2
+        Similar to kmeans, but with several initialization methods.
 
 """
 __docformat__ = 'restructuredtext'
 
 __all__ = ['whiten', 'vq', 'kmeans', 'kmeans2']
 
+# TODO:
+#   - implements high level method for running several times kmeans with
+#   different initialialization 
+#   - warning: what happens if different number of clusters ? For now, emit a
+#   warning, but it is not great, because I am not sure it really make sense to
+#   succeed in this case (maybe an exception is better ?)
 import warnings
 
 from numpy.random import randint


From scipy-svn at scipy.org  Thu Jun  7 22:23:33 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 21:23:33 -0500 (CDT)
Subject: [Scipy-svn] r3072 - trunk/Lib/sandbox/pyem
Message-ID: <20070608022333.E11F639C0C6@new.scipy.org>

Author: cdavid
Date: 2007-06-07 21:23:29 -0500 (Thu, 07 Jun 2007)
New Revision: 3072

Modified:
   trunk/Lib/sandbox/pyem/gauss_mix.py
Log:
Refactor 1d computation for plotting

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-04 11:32:00 UTC (rev 3071)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-08 02:23:29 UTC (rev 3072)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Nov 16 08:00 PM 2006 J
+# Last Change: Mon Jun 04 07:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -264,6 +264,7 @@
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
 
+        assert self.d > 1
         k       = self.k
         Xe, Ye  = self.conf_ellipses(*args, **kargs)
         try:
@@ -287,13 +288,15 @@
         """
         # This is not optimized at all, may be slow. Should not be
         # difficult to make much faster, but it is late, and I am lazy
+        # XXX separete the computation from the plotting
         if not self.d == 1:
             raise GmParamError("the model is not one dimensional model")
         from scipy.stats import norm
         nrm     = norm(0, 1)
         pval    = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2)
 
-        # Compute reasonable min/max for the normal pdf
+        # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
+        # gives the range we are taking in account for each gaussian
         mc  = 3
         std = N.sqrt(self.va[:,0])
         m   = N.amin(self.mu[:, 0] - mc * std)
@@ -338,6 +341,17 @@
         except ImportError:
             raise GmParamError("matplotlib not found, cannot plot...")
 
+    def _get_component_pdf(self, x):
+        """Returns a list of pdf, one for each component. Summing them gives
+        the pdf of the mixture."""
+        std = N.sqrt(self.va[:,0])
+        retval = N.empty((x.size, self.k))
+        for c in range(self.k):
+            retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
+                    N.exp(-(x-self.mu[c][0])**2/(2*std[c]**2))
+
+        return retval
+
     # Syntactic sugar
     def __repr__(self):
         repr    = ""


From scipy-svn at scipy.org  Thu Jun  7 22:25:11 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 21:25:11 -0500 (CDT)
Subject: [Scipy-svn] r3073 - in trunk/Lib/sandbox/pyem: . profile_data
Message-ID: <20070608022511.75EA239C0C6@new.scipy.org>

Author: cdavid
Date: 2007-06-07 21:25:02 -0500 (Thu, 07 Jun 2007)
New Revision: 3073

Added:
   trunk/Lib/sandbox/pyem/profile_data/blop.c
   trunk/Lib/sandbox/pyem/profile_data/gden.m
   trunk/Lib/sandbox/pyem/profile_data/mat_prof.m
Removed:
   trunk/Lib/sandbox/pyem/.bzrignore
   trunk/Lib/sandbox/pyem/test_reg.py
Modified:
   trunk/Lib/sandbox/pyem/TODO
Log:
Add some profiling scripts to compare likelihood computation  with matlab.

Deleted: trunk/Lib/sandbox/pyem/.bzrignore
===================================================================
--- trunk/Lib/sandbox/pyem/.bzrignore	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/.bzrignore	2007-06-08 02:25:02 UTC (rev 3073)
@@ -1,29 +0,0 @@
-dist
-pyem/src/c_gmm.c
-MANIFEST
-build
-pyem/bench1prof
-pyem/diag.dat
-pyem/gdenprof
-tmp.py
-test.py
-profile_gmm_em.py
-data.h5
-gmmprof
-valgrind-python.supp
-valgrind-python.supp
-pyem/
-pyem/matcode/
-pyem/tmp/
-pyem/tmp/kmean.py
-pyem/tmp/blop.py
-pyem/tmp/
-pyem/tmp
-matcode
-../MSG
-MSG
-exinfo.py
-blop
-*.prog
-*.prof
-test_storage.py

Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/TODO	2007-06-08 02:25:02 UTC (rev 3073)
@@ -1,11 +1,12 @@
-# Last Change: Mon May 28 11:00 AM 2007 J
+# Last Change: Mon Jun 04 07:00 PM 2007 J
 
 
 Things which must be implemented for a 1.0 version (in importante order)
     - A classifier
+    - handle rank 1 for 1d data
     - basic regularization
-    - Use scipy.cluster kmeans instead of our own, as it now provides all
-    necessary functionalities.
+    - docstrings
+    - demo for pdf estimtation, discriminant analysis and clustering
 
 Things which would be nice (after 1.0 version):
     - Bayes prior (hard, suppose MCMC)

Added: trunk/Lib/sandbox/pyem/profile_data/blop.c
===================================================================
--- trunk/Lib/sandbox/pyem/profile_data/blop.c	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/profile_data/blop.c	2007-06-08 02:25:02 UTC (rev 3073)
@@ -0,0 +1,37 @@
+#include <math.h>
+#include <stddef.h>
+
+int compute(const double *in, size_t n, size_t d, const double* mu, double* out)
+{
+    size_t  i, j;
+    double acc;
+
+    for (i = 0; i < n; ++i) {
+        acc = 0;
+        for (j = 0; j < d; ++j) {
+            acc += (in[i*d+j] - mu[j]) * (in[i*d+j] - mu[j]); 
+        }
+        out[i] = exp(acc);
+    }
+
+    return 0;
+}
+
+#if 0
+int main(void) 
+{
+    const size_t n = 1e5;
+    const size_t d = 30;
+    size_t iter = 10, i;
+
+    double  *in, *out;
+
+    in = malloc(sizeof(*in) * n * d);
+    out = malloc(sizeof(*out) * n * d);
+
+    for (i = 0; i < iter; ++i) {
+    }
+    free(in);
+    out(in);
+}
+#endif

Added: trunk/Lib/sandbox/pyem/profile_data/gden.m
===================================================================
--- trunk/Lib/sandbox/pyem/profile_data/gden.m	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/profile_data/gden.m	2007-06-08 02:25:02 UTC (rev 3073)
@@ -0,0 +1,10 @@
+function out = gden(x, mu)
+
+% Last Change: Mon Jun 04 10:00 AM 2007 J
+[n, d] = size(x);
+[nm, dm] = size(mu);
+if nm ~= n
+    out = sum(x-repmat(mu, n, 1), 1);
+else
+    out = sum(x-mu, 1);
+end;

Added: trunk/Lib/sandbox/pyem/profile_data/mat_prof.m
===================================================================
--- trunk/Lib/sandbox/pyem/profile_data/mat_prof.m	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/profile_data/mat_prof.m	2007-06-08 02:25:02 UTC (rev 3073)
@@ -0,0 +1,11 @@
+% Last Change: Mon Jun 04 10:00 AM 2007 J
+
+n   = 1e5;
+d   = 30;
+
+x   = randn(n, d);
+mu  = randn(n, d);
+
+for i=1:10
+    y = gden(x, mu);
+end;

Deleted: trunk/Lib/sandbox/pyem/test_reg.py
===================================================================
--- trunk/Lib/sandbox/pyem/test_reg.py	2007-06-08 02:23:29 UTC (rev 3072)
+++ trunk/Lib/sandbox/pyem/test_reg.py	2007-06-08 02:25:02 UTC (rev 3073)
@@ -1,44 +0,0 @@
-import numpy as N
-
-from gauss_mix import GM
-from gmm_em import GMM, EM
-
-from numpy.random import seed
-
-def test_reg():
-    seed(0)
-    # Generate data with a few components
-    d   = 2
-    k   = 1
-    n   = 500
-
-    w, mu, va   = GM.gen_param(d, k)
-    gm          = GM.fromvalues(w, mu, va)
-
-    data    = gm.sample(n)
-
-    # Try to learn with an insane number of components
-    gmm = GMM(GM(d, 30), 'random')
-
-    em  = EM()
-    like= em.train(data, gmm, 20, 1e-20)
-
-    # import pylab as P
-    # P.subplot(2, 1, 1)
-    # P.plot(data[:, 0], data[:, 1], '.')
-    # gmm.gm.plot()
-    # P.subplot(2, 1, 2)
-    # P.plot(like)
-    # print like
-    # P.show()
-
-if __name__ == "__main__":
-    # import hotshot, hotshot.stats
-    # profile_file    = 'manyk.prof'
-    # prof    = hotshot.Profile(profile_file, lineevents=1)
-    # prof.runcall(test_reg)
-    # p = hotshot.stats.load(profile_file)
-    # print p.sort_stats('cumulative').print_stats(20)
-    # prof.close()
-    test_reg()
-


From scipy-svn at scipy.org  Thu Jun  7 22:36:20 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 21:36:20 -0500 (CDT)
Subject: [Scipy-svn] r3074 - in trunk/Lib/sandbox/pyem/data: . oldfaithful
	oldfaithful/src
Message-ID: <20070608023620.7606339C0C6@new.scipy.org>

Author: cdavid
Date: 2007-06-07 21:36:11 -0500 (Thu, 07 Jun 2007)
New Revision: 3074

Added:
   trunk/Lib/sandbox/pyem/data/oldfaithful/
   trunk/Lib/sandbox/pyem/data/oldfaithful/README
   trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
   trunk/Lib/sandbox/pyem/data/oldfaithful/data.py
   trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py
Log:
Add faithful data in data. 

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/README
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/README	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/README	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,6 @@
+Each OldfaithfulC*.txt is one column of the datasets as presented in Azzalini
+and Bowman. The Oldfaithful.txt is simply a cat of all thos files: this is just
+to make checking easier. The data in the txt are *exactly* the same than the
+ones in Azzalini and Bowman: again, post processing them in python is easy
+(converting the time in seconds, etc...), and having exactly the data of the
+reference makes it easier to check.

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,8 @@
+#! /usr/bin/env python
+# Last Change: Wed Apr 25 06:00 PM 2007 J
+import faith as _faith
+__doc__     = _faith.DESCRSHORT
+copyright   = _faith.COPYRIGHT
+source      = _faith.SOURCE
+
+load        = _faith.load

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/data.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/data.py	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/data.py	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,94 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2007 David Cournapeau <cournape at gmail.com>
+
+# The code and descriptive text is copyrighted and offered under the terms of
+# the BSD License from the authors; see below. However, the actual dataset may
+# have a different origin and intellectual property status. See the SOURCE and
+# COPYRIGHT variables for this information.
+
+# Copyright (c) 2007 David Cournapeau <cournape at gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the author nor the names of any contributors may be used
+#       to endorse or promote products derived from this software without
+#       specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Last Change: Fri Jun 08 11:00 AM 2007 J
+
+"""Old faithful dataset."""
+
+__docformat__ = 'restructuredtext'
+
+COPYRIGHT   = """See SOURCE. Pr Azzalini has explicitely given his consent for
+the use of those data in scipy."""
+TITLE       = "Old Faithful Geyser Data"
+SOURCE      = """AZZALINI A., BOWMAN A. W. (1990).  A look at some data on the
+Old Faithful Geyser.  Applied Statistics (Journal of the Royal Statistical
+Society series C), vol. 39, pp. 357-365. Data collected by the Yellowstone Park
+geologist, R. A. Hutchinson.
+
+References: 
+    - H?rdle, W. (1991) Smoothing Techniques with Implementation in S.  New
+    York: Springer.
+    - Azzalini, A. and Bowman, A. W. (1990).  A look at some data on the Old
+Faithful geyser. Applied Statistics, 39, 357--365.
+
+Those data are exactly the ones from Azzalini and Bowman's article."""
+
+DESCRSHORT  = """Waiting time between eruptions and the duration of the
+eruption for the Old Faithful geyser in Yellowstone National Park, Wyoming,
+USA. Waiting times and duration time are in seconds"""
+
+DESCRLONG   = """According to Azzalini and Bowman's article, those data
+were recorded continuously from 1th August to 15th August 1985.
+
+Some of the durations times are labelled as L, M or S (Large, Small, Medium).
+According to Azzalini and Bowman's paper: "because the unbroken sequence
+required measurements to be taken at night, some duration times are recorded as
+L (long), S (short) and M (medium). Other data sets do not contain a con-
+tinuous stream of data, making it difficult to deal with time series features."
+"""
+
+NOTE        = """Eruptions time in seconds, waiting time to next eruption (in
+seconds)"""
+
+def load():
+    """load the actual data and returns them.
+    
+    :returns:
+        data: recordarray
+            a record array of the data.
+    """
+    import numpy
+    from oldfaithful import waiting, duration
+    assert len(waiting) == len(duration) == 299
+    data    = numpy.empty(len(waiting), \
+            [('duration', '|S5'), ('waiting', 'int')])
+    data['waiting']    = waiting
+    data['duration']   = duration
+    return data

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/oldfaithful.py	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,54 @@
+duration =  [ '241', '129', 'L', 'L', 'L', 'S', '263', '257', '122', '290',
+        '110', '327', '97', '292', '263', '106', '280', 'S', '284', '253',
+        '114', '298', 'S', 'L', 'S', 'L', '170', '270', '244', '223', '211',
+        '268', '133', '293', '156', '249', '132', '286', '110', '276', '136',
+        '248', 'S', 'L', 'S', 'L', '113', '256', '125', '268', '133', '240',
+        '106', '260', '131', '269', '233', '200', '224', '240', '117', '316',
+        'S', 'L', 'S', 'L', 'S', 'L', '212', '130', '270', '121', '249', '252',
+        '260', '116', '279', '229', '242', '250', '280', '109', 'L', 'M', 'L',
+        'S', '267', '123', '255', '115', '280', '104', '263', '106', '276',
+        '112', '267', '98', '302', '109', '306', '98', '257', 'S', 'L', 'S',
+        '272', '120', 'L', '176', '284', '234', '117', '247', '108', '280',
+        '110', '282', '127', '287', '109', '246', '279', 'L', 'S', 'L', 'L',
+        '253', '248', '236', '225', '265', '148', '250', '228', '259', '232',
+        '281', '102', '298', '256', '275', 'L', 'L', 'L', 'L', '119', '276',
+        '50', '295', '104', '275', '102', '285', '110', '270', '112', '267',
+        '267', '240', '288', 'L', 'L', 'S', 'L', '116', '275', '120', '222',
+        '172', '290', '207', '263', '108', '264', '149', '271', '126', '261',
+        '262', '107', '295', '109', 'L', 'L', 'L', '232', '111', '282', '121',
+        '268', '112', '250', '114', '255', '195', '253', '113', '299', '111',
+        '240', '118', '286', 'L', 'S', 'L', 'L', '143', '265', '253', '262',
+        '120', '267', '105', '270', '97', '282', '154', '222', '254', '116',
+        '261', 'L', 'L', 'L', '253', '240', '248', '113', '268', '117', '253',
+        '103', '267', '255', '238', '263', '118', '267', '256', '115', '265',
+        'M', 'L', 'S', 'L', '197', '110', '277', '110', '277', '276', '255',
+        '116', '299', '118', '258', '252', '272', '264', '277', 'S', 'L', 'L',
+        '235', 'S', '270', '108', '240', '165', '284', '238', '117', '298',
+        '111', '288', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'S', 'L', '116',
+        '260', '100', '286', '117', '281', '116', '265', '128', '245', '124',
+        'L', 'L', 'S']
+waiting =  [ 4800, 4260, 3420, 4800, 4500, 4620, 3600, 5160, 4620, 3360, 4860,
+        3000, 5340, 3240, 5400, 4380, 3600, 4980, 3900, 4920, 5040, 3240, 5100,
+        3480, 4740, 3420, 5280, 4080, 4560, 4680, 4440, 5100, 4500, 3900, 4560,
+        3480, 5460, 3000, 5220, 2880, 5580, 3240, 5160, 3180, 4680, 3120, 4980,
+        3600, 5220, 2940, 4800, 3600, 5520, 2580, 5340, 3600, 5040, 4140, 4440,
+        4260, 6480, 3000, 4620, 3420, 4800, 3660, 4920, 2880, 4860, 4380, 3720,
+        4740, 3240, 4800, 4380, 4860, 3720, 4860, 4260, 4740, 4860, 4440, 3540,
+        4860, 3960, 5220, 3180, 4800, 3000, 5220, 3060, 4920, 3480, 4860, 2940,
+        5520, 3000, 5280, 3720, 5580, 3360, 5340, 3060, 4740, 3480, 4920, 3120,
+        5280, 3120, 4680, 4140, 4500, 4620, 3180, 4800, 3300, 5220, 3180, 5100,
+        3660, 5580, 3240, 4560, 4800, 4860, 3540, 5160, 4680, 4260, 4620, 4560,
+        5640, 4500, 3000, 4980, 4920, 4320, 4620, 4500, 3900, 4740, 4320, 4680,
+        4620, 4740, 4500, 4680, 3840, 4800, 2940, 5280, 3240, 5100, 3060, 5760,
+        3000, 4800, 4680, 4860, 4320, 4500, 4680, 5220, 4140, 3300, 4980, 2940,
+        4920, 3420, 5040, 3420, 5040, 4380, 4680, 3420, 4740, 3420, 5400, 3720,
+        5220, 4680, 3120, 5880, 2880, 4680, 4740, 3900, 5040, 3000, 4980, 3600,
+        4800, 3000, 5280, 3000, 5040, 4440, 4560, 3900, 5340, 2940, 5280, 3060,
+        4680, 5100, 3900, 4500, 4620, 4140, 5520, 4080, 5220, 3660, 4860, 3300,
+        5580, 3180, 5040, 4200, 4380, 5580, 3000, 5220, 4620, 4440, 4320, 4920,
+        4440, 4800, 2940, 5460, 3180, 5160, 2940, 4740, 5340, 5220, 4560, 3540,
+        4800, 5340, 2700, 5580, 4320, 4260, 3240, 4740, 4440, 3900, 4680, 3420,
+        5220, 4320, 5040, 2820, 5040, 3420, 5220, 4080, 5160, 4500, 4380, 3180,
+        4920, 5580, 4620, 3240, 5760, 2880, 5340, 3780, 5040, 4560, 3720, 4980,
+        3000, 5100, 4680, 4680, 4860, 4680, 4560, 4440, 4860, 3960, 5040, 2880,
+        5580, 2820, 5220, 3060, 4680, 3240, 5220, 3120, 5100, 3480, 5280, 4740]

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/Oldfaithful.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,299 @@
+4:01,80
+2:09 ,71
+L,57
+L,80
+L,75
+S,77
+4:23,60
+4:17,86
+2:02,77
+4:50,56
+1:50,81
+5:27,50
+1:37,89
+4:52,54
+4:23,90
+1:46,73
+4:40,60
+S,83
+4:44,65
+4:13,82
+1:54,84
+4:58,54
+S,85
+L,58
+S,79
+L,57
+2:50,88
+4:30,68
+4:04,76
+3:43,78
+3:31,74
+4:28,85
+2:13,75
+4:53,65
+2:36,76
+4:09,58
+2:12 ,91
+4:46,50
+1:50,87
+4:36,48
+2:16,93
+4:08,54
+S,86
+L,53
+S,78
+L,52
+1:53,83
+4:16,60
+2:05,87
+4:28,49
+2:13,80
+4:00,60
+1:46,92
+4:20,43
+2:11,89
+4:29,60
+3:53,84
+3:20,69
+3:44,74
+4:00,71
+1:57,108
+5:16,50
+S,77
+L,57
+S,80
+L,61
+S,82
+L,48
+3:32,81
+2:10,73
+4:30,62
+2:01,79
+4:09,54
+4:12,80
+4:20,73
+1:56,81
+4:39,62
+3:49,81
+4:02,71
+4:10,79
+4:40,81
+1:49,74
+L,59
+M,81
+L,66
+S,87
+4:27,53
+2:03,80
+4:15,50
+1:55,87
+4:40,51
+1:44,82
+4:23,58
+1:46,81
+4:36,49
+1:52,92
+4:27,50
+1:38,88
+5:02,62
+1:49,93
+5:06,56
+1:38,89
+4:17,51
+S,79
+L,58
+S,82
+4:32,52
+2:00,88
+L,52
+2:56,78
+4:44,69
+3:54,75
+1:57,77
+4:07,53
+1:48,80
+4:40,55
+1:50,87
+4:42,53
+2:07,85
+4:47,61
+1:49,93
+4:06,54
+4:39,76
+L,80
+S,81
+L,59
+L,86
+4:13,78
+4:08,71
+3:56,77
+3:45,76
+4:25,94
+2:28,75
+4:10,50
+3:48,83
+4:19,82
+3:52,72
+4:41,77
+1:42,75
+4:58,65
+4:16,79
+4:35,72
+L,78
+L,77
+L,79
+L,75
+1:59,78
+4:36,64
+0:50,80
+4:55,49
+1:44,88
+4:35,54
+1:42,85
+4:45,51
+1:50,96
+4:30,50
+1:52,80
+4:27,78
+4:27,81
+4:00,72
+4:48,75
+L,78
+L,87
+S,69
+L,55
+1:56,83
+4:35,49
+2:00,82
+3:42,57
+2:52,84
+4:50,57
+3:27,84
+4:23,73
+1:48,78
+4:24,57
+2:29,79
+4:31,57
+2:06,90
+4:21,62
+4:22,87
+1:47,78
+4:55,52
+1:49,98
+L,48
+L,78
+L,79
+3:52,65
+1:51,84
+4:42,50
+2:01,83
+4:28,60
+1:52,80
+4:10,50
+1:54,88
+4:15,50
+3:15,84
+4:13,74
+1:53,76
+4:59,65
+1:51,89
+4:00,49
+1:58,88
+4:46,51
+L,78
+S,85
+L,65
+L,75
+2:23,77
+4:25,69
+4:13,92
+4:22,68
+2:00,87
+4:27,61
+1:45,81
+4:30,55
+1:37,93
+4:42,53
+2:34,84
+3:42,70
+4:14,73
+1:56,93
+4:21,50
+L,87
+L,77
+L,74
+4:13,72
+4:00,82
+4:08,74
+1:53,80
+4:28,49
+1:57,91
+4:13,53
+1:43,86
+4:27,49
+4:15,79
+3:58,89
+4:23,87
+1:58,76
+4:27,59
+4:16,80
+1:55,89
+4:25,45
+M,93
+L,72
+S,71
+L,54
+3:17,79
+1:50,74
+4:37,65
+1:50,78
+4:37,57
+4:36,87
+4:15,72
+1:56,84
+4:59,47
+1:58,84
+4:18,57
+4:12,87
+4:32,68
+4:24,86
+4:37,75
+S,73
+L,53
+L,82
+3:55,93
+S,77
+4:30,54
+1:48,96
+4:00,48
+2:45,89
+4:44,63
+3:58,84
+1:57,76
+4:58,62
+1:51,83
+4:48,50
+L,85
+L,78
+L,78
+L,81
+L,78
+L,76
+L,74
+S,81
+L,66
+1:56,84
+4:20,48
+1:40,93
+4:46,47
+1:57,87
+4:41,51
+1:56,78
+4:25,54
+2:08,87
+4:05,52
+2:04,85
+L,58
+L,88
+S,79

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC1.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,50 @@
+4:01,80
+2:09 ,71
+L,57
+L,80
+L,75
+S,77
+4:23,60
+4:17,86
+2:02,77
+4:50,56
+1:50,81
+5:27,50
+1:37,89
+4:52,54
+4:23,90
+1:46,73
+4:40,60
+S,83
+4:44,65
+4:13,82
+1:54,84
+4:58,54
+S,85
+L,58
+S,79
+L,57
+2:50,88
+4:30,68
+4:04,76
+3:43,78
+3:31,74
+4:28,85
+2:13,75
+4:53,65
+2:36,76
+4:09,58
+2:12 ,91
+4:46,50
+1:50,87
+4:36,48
+2:16,93
+4:08,54
+S,86
+L,53
+S,78
+L,52
+1:53,83
+4:16,60
+2:05,87
+4:28,49

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC2.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,50 @@
+2:13,80
+4:00,60
+1:46,92
+4:20,43
+2:11,89
+4:29,60
+3:53,84
+3:20,69
+3:44,74
+4:00,71
+1:57,108
+5:16,50
+S,77
+L,57
+S,80
+L,61
+S,82
+L,48
+3:32,81
+2:10,73
+4:30,62
+2:01,79
+4:09,54
+4:12,80
+4:20,73
+1:56,81
+4:39,62
+3:49,81
+4:02,71
+4:10,79
+4:40,81
+1:49,74
+L,59
+M,81
+L,66
+S,87
+4:27,53
+2:03,80
+4:15,50
+1:55,87
+4:40,51
+1:44,82
+4:23,58
+1:46,81
+4:36,49
+1:52,92
+4:27,50
+1:38,88
+5:02,62
+1:49,93

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC3.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,50 @@
+5:06,56
+1:38,89
+4:17,51
+S,79
+L,58
+S,82
+4:32,52
+2:00,88
+L,52
+2:56,78
+4:44,69
+3:54,75
+1:57,77
+4:07,53
+1:48,80
+4:40,55
+1:50,87
+4:42,53
+2:07,85
+4:47,61
+1:49,93
+4:06,54
+4:39,76
+L,80
+S,81
+L,59
+L,86
+4:13,78
+4:08,71
+3:56,77
+3:45,76
+4:25,94
+2:28,75
+4:10,50
+3:48,83
+4:19,82
+3:52,72
+4:41,77
+1:42,75
+4:58,65
+4:16,79
+4:35,72
+L,78
+L,77
+L,79
+L,75
+1:59,78
+4:36,64
+0:50,80
+4:55,49

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC4.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,50 @@
+1:44,88
+4:35,54
+1:42,85
+4:45,51
+1:50,96
+4:30,50
+1:52,80
+4:27,78
+4:27,81
+4:00,72
+4:48,75
+L,78
+L,87
+S,69
+L,55
+1:56,83
+4:35,49
+2:00,82
+3:42,57
+2:52,84
+4:50,57
+3:27,84
+4:23,73
+1:48,78
+4:24,57
+2:29,79
+4:31,57
+2:06,90
+4:21,62
+4:22,87
+1:47,78
+4:55,52
+1:49,98
+L,48
+L,78
+L,79
+3:52,65
+1:51,84
+4:42,50
+2:01,83
+4:28,60
+1:52,80
+4:10,50
+1:54,88
+4:15,50
+3:15,84
+4:13,74
+1:53,76
+4:59,65
+1:51,89

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC5.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,50 @@
+4:00,49
+1:58,88
+4:46,51
+L,78
+S,85
+L,65
+L,75
+2:23,77
+4:25,69
+4:13,92
+4:22,68
+2:00,87
+4:27,61
+1:45,81
+4:30,55
+1:37,93
+4:42,53
+2:34,84
+3:42,70
+4:14,73
+1:56,93
+4:21,50
+L,87
+L,77
+L,74
+4:13,72
+4:00,82
+4:08,74
+1:53,80
+4:28,49
+1:57,91
+4:13,53
+1:43,86
+4:27,49
+4:15,79
+3:58,89
+4:23,87
+1:58,76
+4:27,59
+4:16,80
+1:55,89
+4:25,45
+M,93
+L,72
+S,71
+L,54
+3:17,79
+1:50,74
+4:37,65
+1:50,78

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/OldfaithfulC6.txt	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,49 @@
+4:37,57
+4:36,87
+4:15,72
+1:56,84
+4:59,47
+1:58,84
+4:18,57
+4:12,87
+4:32,68
+4:24,86
+4:37,75
+S,73
+L,53
+L,82
+3:55,93
+S,77
+4:30,54
+1:48,96
+4:00,48
+2:45,89
+4:44,63
+3:58,84
+1:57,76
+4:58,62
+1:51,83
+4:48,50
+L,85
+L,78
+L,78
+L,81
+L,78
+L,76
+L,74
+S,81
+L,66
+1:56,84
+4:20,48
+1:40,93
+4:46,47
+1:57,87
+4:41,51
+1:56,78
+4:25,54
+2:08,87
+4:05,52
+2:04,85
+L,58
+L,88
+S,79

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py	2007-06-08 02:25:02 UTC (rev 3073)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py	2007-06-08 02:36:11 UTC (rev 3074)
@@ -0,0 +1,43 @@
+#! /usr/bin/env python
+# Last Change: Fri Jun 08 11:00 AM 2007 J
+
+# This script generates a python file from the txt data
+import csv
+
+dataname = 'Oldfaithful.txt'
+f = open(dataname, 'r')
+a = csv.reader(f)
+el = [i for i in a]
+duration = [i[0] for i in el]
+waiting = [i[1] for i in el]
+
+# Convert duration and waiting times in second
+duration2 = []
+for i in range(len(duration)):
+	if duration[i] == 'L':
+		duration2.append('L')
+	elif duration[i] == 'M':
+		duration2.append('M')
+	elif duration[i] == 'S':
+		duration2.append('S')
+	else:
+		m, s = duration[i].split(':')
+		m = int(m)
+		s = int(s)
+		assert s >= 0 and s < 60
+		duration2.append(m * 60 + s)
+waiting2 = [int(i) * 60 for i in waiting]
+
+# Write the data in oldfaitful.py
+a = open("oldfaithful.py", "w")
+
+a.write("duration =  [\n")
+for i in range(len(duration2) - 1):
+	a.write("'%s', " % duration2[i])
+a.write("'%s']\n" % duration2[-1])
+
+a.write("waiting =  [\n")
+for i in range(len(waiting2) - 1):
+	a.write("%s, " % waiting2[i])
+a.write("%s]\n" % waiting2[-1])
+a.close()


Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful/src/convert.py
___________________________________________________________________
Name: svn:executable
   + *


From scipy-svn at scipy.org  Fri Jun  8 00:09:31 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 23:09:31 -0500 (CDT)
Subject: [Scipy-svn] r3075 - in trunk/Lib/cluster: . tests
Message-ID: <20070608040931.881BB39C167@new.scipy.org>

Author: cdavid
Date: 2007-06-07 23:09:24 -0500 (Thu, 07 Jun 2007)
New Revision: 3075

Modified:
   trunk/Lib/cluster/tests/test_vq.py
   trunk/Lib/cluster/vq.py
Log:
Modify kmeans2 arguments order so that they conform to the ones from kmeans

Modified: trunk/Lib/cluster/tests/test_vq.py
===================================================================
--- trunk/Lib/cluster/tests/test_vq.py	2007-06-08 02:36:11 UTC (rev 3074)
+++ trunk/Lib/cluster/tests/test_vq.py	2007-06-08 04:09:24 UTC (rev 3075)
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 
 # David Cournapeau
-# Last Change: Sat May 05 06:00 PM 2007 J
+# Last Change: Fri Jun 08 12:00 PM 2007 J
 
 # For now, just copy the tests from sandbox.pyem, so we can check that
 # kmeans works OK for trivial examples.
@@ -72,6 +72,7 @@
     #    print _py_vq_1d(data, initc)
 
 class test_kmean(NumpyTestCase):
+    #def check_kmeans
     def check_kmeans_simple(self, level=1):
         initc = N.concatenate(([[X[0]], [X[1]], [X[2]]]))
         code = initc.copy()
@@ -93,8 +94,8 @@
         """Testing simple call to kmeans2 and its results."""
         initc = N.concatenate(([[X[0]], [X[1]], [X[2]]]))
         code = initc.copy()
-        code1 = kmeans2(X, code, niter = 1)[0]
-        code2 = kmeans2(X, code, niter = 2)[0]
+        code1 = kmeans2(X, code, iter = 1)[0]
+        code2 = kmeans2(X, code, iter = 2)[0]
 
         assert_array_almost_equal(code1, CODET1)
         assert_array_almost_equal(code2, CODET2)

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-08 02:36:11 UTC (rev 3074)
+++ trunk/Lib/cluster/vq.py	2007-06-08 04:09:24 UTC (rev 3075)
@@ -466,7 +466,7 @@
 
 _valid_init_meth = {'random': _krandinit, 'points': _kpoints}
 
-def kmeans2(data, k, minit='random', niter=10):
+def kmeans2(data, k, iter = 10, thresh = 1e-5, minit='random'):
     """Classify a set of points into k clusters using kmean algorithm.
 
     The algorithm works by minimizing the euclidian distance between data points
@@ -481,6 +481,10 @@
         k : int or ndarray
             Number of clusters. If a ndarray is given instead, it is
             interpreted as initial cluster to use instead.
+        niter : int
+            Number of iterations to run.
+        niter : float
+            (not used yet).
         minit : string
             Method for initialization. Available methods are random, points and
             uniform:
@@ -493,9 +497,6 @@
             uniform choses k points from the data such are they form a uniform
             grid od the dataset.
 
-        niter : int
-            Number of iterations to run.
-
     :Returns:
         clusters : ndarray
             the found clusters (one cluster per row).
@@ -535,8 +536,8 @@
             raise ValueError("unknown init method %s" % str(minit))
         clusters = init(data, k)
 
-    assert not niter == 0
-    return _kmeans2(data, clusters, niter, nc)
+    assert not iter == 0
+    return _kmeans2(data, clusters, iter, nc)
 
 def _kmeans2(data, code, niter, nc):
     """ "raw" version of kmeans2. Do not use directly.


From scipy-svn at scipy.org  Fri Jun  8 00:14:20 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 23:14:20 -0500 (CDT)
Subject: [Scipy-svn] r3076 - trunk/Lib/cluster
Message-ID: <20070608041420.782BB39C167@new.scipy.org>

Author: cdavid
Date: 2007-06-07 23:14:14 -0500 (Thu, 07 Jun 2007)
New Revision: 3076

Modified:
   trunk/Lib/cluster/vq.py
Log:
Fix typo in kmeans2 docstring.

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-08 04:09:24 UTC (rev 3075)
+++ trunk/Lib/cluster/vq.py	2007-06-08 04:14:14 UTC (rev 3076)
@@ -483,7 +483,7 @@
             interpreted as initial cluster to use instead.
         niter : int
             Number of iterations to run.
-        niter : float
+        thresh : float
             (not used yet).
         minit : string
             Method for initialization. Available methods are random, points and


From scipy-svn at scipy.org  Fri Jun  8 00:42:30 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu,  7 Jun 2007 23:42:30 -0500 (CDT)
Subject: [Scipy-svn] r3077 - trunk/Lib/cluster
Message-ID: <20070608044230.9020439C0C6@new.scipy.org>

Author: cdavid
Date: 2007-06-07 23:42:23 -0500 (Thu, 07 Jun 2007)
New Revision: 3077

Modified:
   trunk/Lib/cluster/vq.py
Log:
Additional init method for kmeans2, 'matrix', to avoid possible confusion between k number of components and k unique initial cluster. Close #443

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-08 04:14:14 UTC (rev 3076)
+++ trunk/Lib/cluster/vq.py	2007-06-08 04:42:23 UTC (rev 3077)
@@ -479,8 +479,8 @@
             dimensional data, rank 2 multidimensional data, in which case one
             row is one observation.
         k : int or ndarray
-            Number of clusters. If a ndarray is given instead, it is
-            interpreted as initial cluster to use instead.
+            Number of clusters. If minit arg is 'matrix', or if a ndarray is
+            given instead, it is interpreted as initial cluster to use instead.
         niter : int
             Number of iterations to run.
         thresh : float
@@ -495,8 +495,11 @@
             points choses k points at random from the points in data.
 
             uniform choses k points from the data such are they form a uniform
-            grid od the dataset.
+            grid od the dataset (not supported yet).
 
+            matrix means that k has to be interpreted as initial clusters
+            (format is the same than data).
+
     :Returns:
         clusters : ndarray
             the found clusters (one cluster per row).
@@ -517,7 +520,7 @@
 
     # If k is not a single value, then it should be compatible with data's
     # shape
-    if N.size(k) > 1:
+    if N.size(k) > 1 or minit == 'matrix':
         if not nd == N.ndim(k):
             raise ValueError("k is not an int and has not same rank than data")
         if d == 1:
@@ -529,7 +532,9 @@
                         data")
         clusters = k.copy()
     else:
-        nc = k
+        nc = int(k)
+        if not nc == k:
+            warnings.warn("k was not an integer, was converted.")
         try:
             init = _valid_init_meth[minit]
         except KeyError:


From scipy-svn at scipy.org  Fri Jun  8 07:15:47 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri,  8 Jun 2007 06:15:47 -0500 (CDT)
Subject: [Scipy-svn] r3078 - in trunk/Lib/sandbox/pyem: . data/oldfaithful
Message-ID: <20070608111547.A99DE39C0F3@new.scipy.org>

Author: cdavid
Date: 2007-06-08 06:15:39 -0500 (Fri, 08 Jun 2007)
New Revision: 3078

Modified:
   trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/online_em.py
Log:
Add function to plot density contours in GM.

Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
-# Last Change: Wed Apr 25 06:00 PM 2007 J
-import faith as _faith
+# Last Change: Fri Jun 08 12:00 PM 2007 J
+import data as _faith
 __doc__     = _faith.DESCRSHORT
 copyright   = _faith.COPYRIGHT
 source      = _faith.SOURCE

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
 
 import numpy as N
 import numpy.linalg as lin
@@ -188,6 +188,10 @@
         else:
             raise DenError("mean and variance are not dim conformant")
 
+    # When X is a sample from multivariante N(mu, sigma), (X-mu)Sigma^-1(X-mu)
+    # follows a Chi2(d) law. Here, we only take 2 dimension, so Chi2 with 2
+    # degree of freedom (See Wasserman. This is easy to see with characteristic
+    # functions)
     chi22d  = chi2(2)
     mahal   = N.sqrt(chi22d.ppf(level))
     
@@ -218,6 +222,26 @@
 
     return elps[0, :], elps[1, :]
 
+def multiple_gauss_den(data, mu, va):
+    """Helper function to generate several Gaussian
+    pdf (different parameters) from the same data"""
+    mu  = N.atleast_2d(mu)
+    va  = N.atleast_2d(va)
+
+    K   = mu.shape[0]
+    n   = data.shape[0]
+    d   = mu.shape[1]
+    
+    y   = N.zeros((K, n))
+    if mu.size == va.size:
+        for i in range(K):
+            y[i] = gauss_den(data, mu[i, :], va[i, :])
+        return y.T
+    else:
+        for i in range(K):
+            y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :])
+        return y.T
+
 if __name__ == "__main__":
     import pylab
 

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 04 07:00 PM 2007 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -344,6 +344,8 @@
     def _get_component_pdf(self, x):
         """Returns a list of pdf, one for each component. Summing them gives
         the pdf of the mixture."""
+        # XXX: have a public function to compute the pdf at given points
+        # instead...
         std = N.sqrt(self.va[:,0])
         retval = N.empty((x.size, self.k))
         for c in range(self.k):
@@ -352,6 +354,47 @@
 
         return retval
 
+    def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+        """Do all the necessary computation for contour plot of mixture's density.
+        
+        Returns X, Y, Z and V as expected by mpl contour function."""
+
+        # Ok, it is a bit gory. Basically, we want to compute the size of the
+        # grid. We use conf_ellipse, which will return a couple of points for
+        # each component, and we can find a grid size which then is just big
+        # enough to contain all ellipses. This won't work well if two
+        # ellipsoids are crossing each other a lot (because this assumes that
+        # at a given point, one component is largely dominant for its
+        # contribution to the pdf).
+
+        # XXX: we need log pdf, not the pdf... this can save some computing
+        Xe, Ye = self.conf_ellipses(level = maxlevel)
+        ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
+
+        w = ax[1] - ax[0]
+        h = ax[3] - ax[2]
+        X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
+                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+        lden = N.log(den)
+        V = [-5, -3, -1, -0.5, ]
+        V.extend(N.linspace(0, N.max(lden), 4).tolist())
+        return X, Y, lden, N.array(V)
+
+    def _densityctr(self, xrange, yrange):
+        """Helper function to compute density contours on a grid."""
+        gr = N.meshgrid(xrange, yrange)
+        X = gr[0].flatten()
+        Y = gr[1].flatten()
+        xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
+        # XXX refactor computing pdf
+        d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
+        d = N.sum(d, 1)
+        d = d.reshape(len(yrange), len(xrange))
+
+        X = gr[0]
+        Y = gr[1]
+        return X, Y, d
+
     # Syntactic sugar
     def __repr__(self):
         repr    = ""

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
 
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
@@ -65,7 +65,8 @@
         d       = self.gm.d
         init    = data[0:k, :]
 
-        (code, label)   = kmean(data, init, niter)
+        # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+        (code, label)   = kmean(data, init, niter, minit = 'matrix')
 
         w   = N.ones(k) / k
         mu  = code.copy()
@@ -135,7 +136,7 @@
         n   = data.shape[0]
 
         # compute the gaussian pdf
-        tgd	= multiple_gauss_den(data, self.gm.mu, self.gm.va)
+        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
         tgd	*= self.gm.w
         # Normalize to get a pdf
@@ -202,7 +203,7 @@
         the data """
         assert(self.isinit)
         # compute the gaussian pdf
-        tgd	= multiple_gauss_den(data, self.gm.mu, self.gm.va)
+        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
         tgd	*= self.gm.w
 
@@ -367,27 +368,6 @@
     else:
         return False
 
-def multiple_gauss_den(data, mu, va):
-    """Helper function to generate several Gaussian
-    pdf (different parameters) from the same data"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
-
-    K   = mu.shape[0]
-    n   = data.shape[0]
-    d   = mu.shape[1]
-    
-    y   = N.zeros((K, n))
-    if mu.size == va.size:
-        for i in range(K):
-            y[i] = densities.gauss_den(data, mu[i, :], va[i, :])
-        return y.T
-    else:
-        for i in range(K):
-            y[i] = densities.gauss_den(data, mu[i, :], 
-                        va[d*i:d*i+d, :])
-        return y.T
-
 if __name__ == "__main__":
     import copy
     #=============================

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
 
 #---------------------------------------------
 # This is not meant to be used yet !!!! I am 
@@ -67,7 +67,7 @@
             self.cxx    = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], niter)
+            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
             mu          = code.copy()
             va          = N.zeros((k, d))
             for i in range(k):
@@ -102,7 +102,7 @@
             self.cxx    = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], niter)
+            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
             mu          = code.copy()
             va          = N.zeros((k, d))
             for i in range(k):
@@ -176,7 +176,7 @@
 
         # w, mu and va init is the same that in the standard case
         (code, label)   = kmean(init_data[:, N.newaxis], \
-                init_data[0:k, N.newaxis], niter)
+                init_data[0:k, N.newaxis], iter = niter)
         mu          = code.copy()
         va          = N.zeros((k, 1))
         for i in range(k):


From scipy-svn at scipy.org  Fri Jun  8 23:51:51 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri,  8 Jun 2007 22:51:51 -0500 (CDT)
Subject: [Scipy-svn] r3079 - in trunk/Lib/sandbox/pyem/data: . oldfaithful
Message-ID: <20070609035151.C21E639C12F@new.scipy.org>

Author: cdavid
Date: 2007-06-08 22:51:44 -0500 (Fri, 08 Jun 2007)
New Revision: 3079

Added:
   trunk/Lib/sandbox/pyem/data/__init__.py
   trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING
Modified:
   trunk/Lib/sandbox/pyem/data/oldfaithful/data.py
Log:
Add proper license in data, correct typo (double copyright)

Added: trunk/Lib/sandbox/pyem/data/__init__.py
===================================================================

Added: trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING	2007-06-08 11:15:39 UTC (rev 3078)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/COPYING	2007-06-09 03:51:44 UTC (rev 3079)
@@ -0,0 +1,34 @@
+# The code and descriptive text is copyrighted and offered under the terms of
+# the BSD License from the authors; see below. However, the actual dataset may
+# have a different origin and intellectual property status. See the SOURCE and
+# COPYRIGHT variables for this information.
+
+# Copyright (c) 2007 David Cournapeau <cournape at gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the author nor the names of any contributors may be used
+#       to endorse or promote products derived from this software without
+#       specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/data.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/data.py	2007-06-08 11:15:39 UTC (rev 3078)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/data.py	2007-06-09 03:51:44 UTC (rev 3079)
@@ -1,8 +1,6 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2007 David Cournapeau <cournape at gmail.com>
-
 # The code and descriptive text is copyrighted and offered under the terms of
 # the BSD License from the authors; see below. However, the actual dataset may
 # have a different origin and intellectual property status. See the SOURCE and


From scipy-svn at scipy.org  Sat Jun  9 02:16:29 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 01:16:29 -0500 (CDT)
Subject: [Scipy-svn] r3080 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070609061629.9C48539C191@new.scipy.org>

Author: cdavid
Date: 2007-06-09 01:16:08 -0500 (Sat, 09 Jun 2007)
New Revision: 3080

Modified:
   trunk/Lib/sandbox/pyem/README
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/misc.py
   trunk/Lib/sandbox/pyem/tests/test_densities.py
Log:
Polish contour functions, so that choosing the dimension of projection works.

Modified: trunk/Lib/sandbox/pyem/README
===================================================================
--- trunk/Lib/sandbox/pyem/README	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/README	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,7 +1,5 @@
-Last Change: Fri Aug 04 07:00 PM 2006 J
+Last Change: Sat Jun 09 12:00 PM 2007 J
 
-Version 0.4.2
-
 pyem is a python module build upon numpy and scipy
 (see http://www.scipy.org/) for learning mixtures models
 using Expectation Maximization. For now, only Gaussian
@@ -10,16 +8,6 @@
  * computation of Gaussian pdf for multi-variate Gaussian
  random vectors (spherical, diagonal and full covariance matrices)
  * Sampling of Gaussian Mixtures Models
- * Confidence ellipsoides with probability (fixed level of 
- 0.39 for now)
+ * Confidence ellipsoides with probability at arbitrary level
  * Classic EM for Gaussian Mixture Models
  * K-mean based and random initialization for EM available
-
-Has been tested on the following platforms:
-
- * Ubuntu dapper, bi Xeon 3.2 Ghz, 2 Go RAM
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, pentium M 1.2 ghz,. 512 Mo Ram
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2
- * Ubuntu dapper, minimac (ppc G4 1.42 Ghz, 1Gb RAM)
- python 2.4 + pyrex, numpy 1.0.b2SVN + scipy 0.5.1SVN, uses atlas3-sse2

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,12 +1,13 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
 
 import numpy as N
 import numpy.linalg as lin
 from numpy.random import randn
 from scipy.stats import chi2
+import misc
 
 # Error classes
 class DenError(Exception):
@@ -164,19 +165,28 @@
  
     return y
 
-# To plot a confidence ellipse from multi-variate gaussian pdf
-def gauss_ell(mu, va, dim = [0, 1], npoints = 100, level = 0.39):
+# To get coordinatea of a confidence ellipse from multi-variate gaussian pdf
+def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \
+        npoints = misc._DEF_ELL_NP, \
+        level = misc._DEF_LEVEL):
     """ Given a mean and covariance for multi-variate
     gaussian, returns npoints points for the ellipse
     of confidence given by level (all points will be inside
     the ellipsoides with a probability equal to level)
     
     Returns the coordinate x and y of the ellipse"""
+    if level >= 1 or level <= 0:
+        raise ValueError("level should be a scale strictly between 0 and 1.""")
     
     mu      = N.atleast_1d(mu)
     va      = N.atleast_1d(va)
+    d       = mu.shape[0]
     c       = N.array(dim)
 
+    print c, d
+    if N.any(c < 0) or N.any(c >= d):
+        raise ValueError("dim elements should be >= 0 and < %d (dimension"\
+                " of the variance)" % d)
     if mu.size == va.size:
         mode    = 'diag'
     else:

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 07:00 PM 2007 J
+# Last Change: Sat Jun 09 03:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -7,7 +7,7 @@
 from numpy.random import randn, rand
 import numpy.linalg as lin
 import densities
-from misc import _MAX_DBL_DEV
+import misc
 
 # Right now, two main usages of a Gaussian Model are possible
 #   - init a Gaussian Model with meta-parameters, and trains it
@@ -147,7 +147,8 @@
 
         return X
 
-    def conf_ellipses(self, *args, **kargs):
+    def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \
+        level = misc._DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
@@ -179,14 +180,14 @@
         if self.mode == 'diag':
             for i in range(self.k):
                 xe, ye  = densities.gauss_ell(self.mu[i,:], self.va[i,:], 
-                        *args, **kargs)
+                        dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
         elif self.mode == 'full':
             for i in range(self.k):
                 xe, ye  = densities.gauss_ell(self.mu[i,:], 
                         self.va[i*self.d:i*self.d+self.d,:], 
-                        *args, **kargs)
+                        dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
 
@@ -253,7 +254,8 @@
     #=================
     # Plotting methods
     #=================
-    def plot(self, *args, **kargs):
+    def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, 
+            level = misc._DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
         Returns a list of lines, so that their style can be modified. By default,
@@ -266,7 +268,7 @@
 
         assert self.d > 1
         k       = self.k
-        Xe, Ye  = self.conf_ellipses(*args, **kargs)
+        Xe, Ye  = self.conf_ellipses(dim, npoints, level)
         try:
             import pylab as P
             return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
@@ -354,7 +356,8 @@
 
         return retval
 
-    def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+    def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50,
+            maxlevel = 0.95):
         """Do all the necessary computation for contour plot of mixture's density.
         
         Returns X, Y, Z and V as expected by mpl contour function."""
@@ -368,33 +371,49 @@
         # contribution to the pdf).
 
         # XXX: we need log pdf, not the pdf... this can save some computing
-        Xe, Ye = self.conf_ellipses(level = maxlevel)
+        Xe, Ye = self.conf_ellipses(level = maxlevel, dim = dim)
         ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
 
         w = ax[1] - ax[0]
         h = ax[3] - ax[2]
         X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
-                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
         V = [-5, -3, -1, -0.5, ]
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange):
+    def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
         gr = N.meshgrid(xrange, yrange)
         X = gr[0].flatten()
         Y = gr[1].flatten()
         xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
         # XXX refactor computing pdf
-        d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
-        d = N.sum(d, 1)
-        d = d.reshape(len(yrange), len(xrange))
+        dmu = self.mu[:, dim]
+        dva = self._get_va(dim)
+        den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
+        den = N.sum(den, 1)
+        den = den.reshape(len(yrange), len(xrange))
 
         X = gr[0]
         Y = gr[1]
-        return X, Y, d
+        return X, Y, den
 
+    def _get_va(self, dim):
+        """Returns variance limited do dimension in dim."""
+        dim = N.array(dim)
+        if dim.any() < 0 or dim.any() >= self.d:
+            raise ValueError("dim elements should be between 0 and dimension"\
+                    " of the mixture.")
+        if self.mode == 'diag':
+            return self.va[:, dim]
+        elif self.mode == 'full':
+            tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
+            tidx.flatten()
+            return self.va[tidx, dim]
+        else:
+            raise ValueError("Unkown mode")
     # Syntactic sugar
     def __repr__(self):
         repr    = ""
@@ -450,7 +469,7 @@
     """
         
     # Check that w is valid
-    if N.fabs(N.sum(w, 0)  - 1) > _MAX_DBL_DEV:
+    if N.fabs(N.sum(w, 0)  - 1) > misc._MAX_DBL_DEV:
         raise GmParamError('weight does not sum to 1')
     
     if not len(w.shape) == 1:

Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/misc.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,12 @@
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Sat Jun 09 12:00 PM 2007 J
 
+#========================================================
+# Constants used throughout the module (def args, etc...)
+#========================================================
+# This is the default dimension for representing confidence ellipses
+_DEF_VIS_DIM = [0, 1]
+_DEF_ELL_NP = 100
+_DEF_LEVEL = 0.39
 #=====================================================================
 # "magic number", that is number used to control regularization and co
 # Change them at your risk !

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-09 03:51:44 UTC (rev 3079)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-09 06:16:08 UTC (rev 3080)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 02:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -13,6 +13,7 @@
 
 set_package_path()
 from pyem.densities import gauss_den
+import pyem.densities
 restore_path()
 
 #Optional:
@@ -105,89 +106,15 @@
         self._generate_test_data_2d_full()
         self._check(level)
 
+class test_gauss_ell(NumpyTestCase):
+    def test_dim(self):
+        pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 1])
+        try:
+            pyem.densities.gauss_ell([0, 1], [1, 2.], [0, 2])
+            raise AssertionError("this call should not succeed, bogus dim.")
+        except ValueError, e:
+            print "Call with bogus dim did not succeed, OK"
+
+
 if __name__ == "__main__":
     NumpyTest().run()
-
-# def generate_test_data(n, d, mode = 'diag', file='test.dat'):
-#     """Generate a set of data of dimension d, with n frames,
-#     that is input data, mean, var and output of gden, so that
-#     other implementations can be tested against"""
-#     mu  = randn(1, d)
-#     if mode == 'diag':
-#         va  = abs(randn(1, d))
-#     elif mode == 'full':
-#         va  = randn(d, d)
-#         va  = dot(va, va.transpose())
-# 
-#     input   = randn(n, d)
-#     output  = gauss_den(input, mu, va)
-# 
-#     import tables
-#     h5file  = tables.openFile(file, "w")
-# 
-#     h5file.createArray(h5file.root, 'input', input)
-#     h5file.createArray(h5file.root, 'mu', mu)
-#     h5file.createArray(h5file.root, 'va', va)
-#     h5file.createArray(h5file.root, 'output', output)
-# 
-#     h5file.close()
-# 
-# def test_gauss_den():
-#     """"""
-#     # import tables
-#     # import numpy as N
-#     # 
-#     # filename    = 'dendata.h5'
-# 
-#     # # # Dimension 1
-#     # # d   = 1
-#     # # mu  = 1.0
-#     # # va  = 2.0
-# 
-#     # # X   = randn(1e3, 1)
-# 
-#     # # Y   = gauss_den(X, mu, va)
-# 
-#     # # h5file      = tables.openFile(filename, "w")
-# 
-#     # # h5file.createArray(h5file.root, 'X', X)
-#     # # h5file.createArray(h5file.root, 'mu', mu)
-#     # # h5file.createArray(h5file.root, 'va', va)
-#     # # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # # h5file.close()
-# 
-#     # # # Dimension 2, diag
-#     # # d   = 2
-#     # # mu  = N.array([1.0, -2.0])
-#     # # va  = N.array([1.0, 2.0])
-# 
-#     # # X   = randn(1e3, 2)
-# 
-#     # # Y   = gauss_den(X, mu, va)
-# 
-#     # # h5file      = tables.openFile(filename, "w")
-# 
-#     # # h5file.createArray(h5file.root, 'X', X)
-#     # # h5file.createArray(h5file.root, 'mu', mu)
-#     # # h5file.createArray(h5file.root, 'va', va)
-#     # # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # # Dimension 2, full
-#     # d   = 2
-#     # mu  = N.array([[0.2, -1.0]])
-#     # va  = N.array([[1.2, 0.1], [0.1, 0.5]])
-# 
-#     # X   = randn(1e3, 2)
-# 
-#     # Y   = gauss_den(X, mu, va)
-# 
-#     # h5file      = tables.openFile(filename, "w")
-# 
-#     # h5file.createArray(h5file.root, 'X', X)
-#     # h5file.createArray(h5file.root, 'mu', mu)
-#     # h5file.createArray(h5file.root, 'va', va)
-#     # h5file.createArray(h5file.root, 'Y', Y)
-# 
-#     # h5file.close()
-# 


From scipy-svn at scipy.org  Sat Jun  9 02:42:24 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 01:42:24 -0500 (CDT)
Subject: [Scipy-svn] r3081 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070609064224.263BF39C0AC@new.scipy.org>

Author: cdavid
Date: 2007-06-09 01:42:03 -0500 (Sat, 09 Jun 2007)
New Revision: 3081

Added:
   trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Fail nicely when call wrong plot function (plot1d for multinomial, plot for 1d models).

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 06:16:08 UTC (rev 3080)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 06:42:03 UTC (rev 3081)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 02:00 PM 2007 J
+# Last Change: Sat Jun 09 03:00 PM 2007 J
 
 import numpy as N
 import numpy.linalg as lin
@@ -183,7 +183,6 @@
     d       = mu.shape[0]
     c       = N.array(dim)
 
-    print c, d
     if N.any(c < 0) or N.any(c >= d):
         raise ValueError("dim elements should be >= 0 and < %d (dimension"\
                 " of the variance)" % d)

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 06:16:08 UTC (rev 3080)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 06:42:03 UTC (rev 3081)
@@ -74,6 +74,10 @@
             self.va  = N.zeros((k * d, d))
 
         self.is_valid   = False
+        if d > 1:
+            self.is1d = False
+        else:
+            self.is1d = True
 
     def set_param(self, weights, mu, sigma):
         """Set parameters of the model. Args should
@@ -171,6 +175,10 @@
             Will plot samples X draw from the mixture model, and
             plot the ellipses of equi-probability from the mean with
             fixed level of confidence 0.39.  """
+        if self.is1d:
+            raise ValueError("This function does not make sense for 1d "
+                "mixtures.")
+
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
@@ -262,11 +270,14 @@
         the style is red color, and nolegend for all of them.
         
         Does not work for 1d"""
+        if self.is1d:
+            raise ValueError("This function does not make sense for 1d "
+                "mixtures.")
+
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
 
-        assert self.d > 1
         k       = self.k
         Xe, Ye  = self.conf_ellipses(dim, npoints, level)
         try:
@@ -288,6 +299,10 @@
             - h['gpdf'] is the line for the global pdf
             - h['conf'] is a list of filling area
         """
+        if not self.is1d:
+            raise ValueError("This function does not make sense for "
+                "mixtures which are not unidimensional")
+
         # This is not optimized at all, may be slow. Should not be
         # difficult to make much faster, but it is late, and I am lazy
         # XXX separete the computation from the plotting
@@ -361,6 +376,9 @@
         """Do all the necessary computation for contour plot of mixture's density.
         
         Returns X, Y, Z and V as expected by mpl contour function."""
+        if self.is1d:
+            raise ValueError("This function does not make sense for 1d "
+                "mixtures.")
 
         # Ok, it is a bit gory. Basically, we want to compute the size of the
         # grid. We use conf_ellipse, which will return a couple of points for
@@ -414,6 +432,7 @@
             return self.va[tidx, dim]
         else:
             raise ValueError("Unkown mode")
+
     # Syntactic sugar
     def __repr__(self):
         repr    = ""

Added: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-09 06:16:08 UTC (rev 3080)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-09 06:42:03 UTC (rev 3081)
@@ -0,0 +1,46 @@
+#! /usr/bin/env python
+# Last Change: Sat Jun 09 03:00 PM 2007 J
+
+# For now, just test that all mode/dim execute correctly
+
+import sys
+from numpy.testing import *
+
+import numpy as N
+
+set_package_path()
+from pyem import GM
+restore_path()
+
+class test_BasicFunc(NumpyTestCase):
+    """Check that basic functionalities work."""
+    def test_conf_ellip(self):
+        """Only test whether the call succeed. To check wether the result is
+        OK, you have to plot the results."""
+        d = 3
+        k = 3
+        w, mu, va = GM.gen_param(d, k)
+        gm = GM.fromvalues(w, mu, va)
+        gm.conf_ellipses()
+
+    def test_1d_bogus(self):
+        """Check that functions which do not make sense for 1d fail nicely."""
+        d = 1
+        k = 2
+        w, mu, va = GM.gen_param(d, k)
+        gm = GM.fromvalues(w, mu, va)
+        try:
+            gm.conf_ellipses()
+            raise AssertionError("This should not work !")
+        except ValueError, e:
+            print "Ok, conf_ellipses failed as expected (with msg: " + str(e) + ")"
+
+        try:
+            gm.density_on_grid()
+            raise AssertionError("This should not work !")
+        except ValueError, e:
+            print "Ok, density_grid failed as expected (with msg: " + str(e) + ")"
+
+
+if __name__ == "__main__":
+    NumpyTest().run()

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-09 06:16:08 UTC (rev 3080)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-09 06:42:03 UTC (rev 3081)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Tue Oct 24 06:00 PM 2006 J
+# Last Change: Sat Jun 09 03:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 

From scipy-svn at scipy.org  Sat Jun  9 04:05:29 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 03:05:29 -0500 (CDT)
Subject: [Scipy-svn] r3082 - in trunk/Lib/sandbox/pyem: . examples
Message-ID: <20070609080529.0893F39C19B@new.scipy.org>

Author: cdavid
Date: 2007-06-09 03:05:03 -0500 (Sat, 09 Jun 2007)
New Revision: 3082

Added:
   trunk/Lib/sandbox/pyem/examples/pdfestimation.py
   trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
   trunk/Lib/sandbox/pyem/examples/utils.py
Modified:
   trunk/Lib/sandbox/pyem/TODO
Log:
Add example of pdf estimation with EM

Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO	2007-06-09 06:42:03 UTC (rev 3081)
+++ trunk/Lib/sandbox/pyem/TODO	2007-06-09 08:05:03 UTC (rev 3082)
@@ -1,12 +1,12 @@
-# Last Change: Mon Jun 04 07:00 PM 2007 J
+# Last Change: Sat Jun 09 04:00 PM 2007 J
 
-
 Things which must be implemented for a 1.0 version (in importante order)
     - A classifier
     - handle rank 1 for 1d data
     - basic regularization
     - docstrings
-    - demo for pdf estimtation, discriminant analysis and clustering
+    - demo for pdf estimation, discriminant analysis and clustering
+    - scaling of data: maybe something to handle scaling internally ?
 
 Things which would be nice (after 1.0 version):
     - Bayes prior (hard, suppose MCMC)

Added: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 06:42:03 UTC (rev 3081)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 08:05:03 UTC (rev 3082)
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+# Last Change: Sat Jun 09 03:00 PM 2007 J
+
+# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
+import numpy as N
+from numpy.testing import set_package_path, restore_path
+
+import pylab as P
+
+set_package_path()
+import pyem
+restore_path()
+import utils
+
+oldfaithful = utils.get_faithful()
+
+# We want the relationship between d(t) and w(t+1), but get_faithful gives
+# d(t), w(t), so we have to shift to get the "usual" faithful data
+waiting = oldfaithful[1:, 1:]
+duration = oldfaithful[:len(waiting), :1]
+dt = N.concatenate((duration, waiting), 1)
+
+# Scale the data so that each component is in [0..1]
+dt = utils.scale(dt)
+
+# This function train a mixture model with k components, returns the trained
+# model and the BIC
+def cluster(data, k, mode = 'full'):
+    d = data.shape[1]
+    gm = pyem.GM(d, k, mode)
+    gmm = pyem.GMM(gm)
+    em = pyem.EM()
+    em.train(data, gmm, maxiter = 20)
+    return gm, gmm.bic(data)
+
+# bc will contain a list of BIC values for each model trained
+bc = []
+mode = 'full'
+for k in range(1, 5):
+    # Train a model of k component, and plots isodensity curve
+    P.subplot(2, 2, k)
+    gm, b = cluster(dt, k = k, mode = mode)
+    bc.append(b)
+
+    X, Y, Z, V = gm.density_on_grid()
+    P.contour(X, Y, Z, V)
+    P.plot(dt[:, 0], dt[:, 1], '.')
+
+print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
+P.show()

Added: trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py	2007-06-09 06:42:03 UTC (rev 3081)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py	2007-06-09 08:05:03 UTC (rev 3082)
@@ -0,0 +1,69 @@
+#! /usr/bin/env python
+# Last Change: Sat Jun 09 04:00 PM 2007 J
+
+# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
+import numpy as N
+from numpy.testing import set_package_path, restore_path
+
+import pylab as P
+import matplotlib as MPL
+
+set_package_path()
+import pyem
+restore_path()
+import utils
+
+oldfaithful = utils.get_faithful()
+
+duration = oldfaithful[:, :1]
+waiting = oldfaithful[:, 1:]
+
+#dt = utils.scale(duration)
+#dt = duration / 60.
+dt = waiting / 60.
+
+# This function train a mixture model with k components, returns the trained
+# model and the BIC
+def cluster(data, k):
+    d = data.shape[1]
+    gm = pyem.GM(d, k)
+    gmm = pyem.GMM(gm)
+    em = pyem.EM()
+    em.train(data, gmm, maxiter = 20)
+    return gm, gmm.bic(data)
+
+# bc will contain a list of BIC values for each model trained, gml the
+# corresponding mixture model
+bc = []
+gml = []
+
+for k in range(1, 8):
+    gm, b = cluster(dt, k = k)
+    bc.append(b)
+    gml.append(gm)
+
+mbic = N.argmax(bc)
+
+# Below is code to display a figure with histogram and best model (in the BIC sense)
+# pdf, with the BIC as a function of the number of components on the right.
+P.figure(figsize = [12, 7])
+P.subplot(1, 2, 1)
+h = gml[mbic].plot1d(gpdf=True)
+h['gpdf'][0].set_linestyle('-')
+h['gpdf'][0].set_label('pdf of the mixture')
+h['pdf'][0].set_label('pdf of individual component')
+[l.set_linestyle('-') for l in h['pdf']]
+[l.set_color('g') for l in h['pdf']]
+
+prop = MPL.font_manager.FontProperties(size='smaller')
+P.legend(loc = 'best', prop = prop)
+
+P.hist(dt, 25, normed = 1, fill = False)
+P.xlabel('waiting time between consecutive eruption (in min)')
+
+P.subplot(1, 2, 2)
+P.plot(N.arange(1, 8), bc, 'o:')
+P.xlabel("number of components")
+P.ylabel("BIC")
+print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
+P.show()

Added: trunk/Lib/sandbox/pyem/examples/utils.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/utils.py	2007-06-09 06:42:03 UTC (rev 3081)
+++ trunk/Lib/sandbox/pyem/examples/utils.py	2007-06-09 08:05:03 UTC (rev 3082)
@@ -0,0 +1,44 @@
+#! /usr/bin/env python
+# Last Change: Fri Jun 08 04:00 PM 2007 J
+
+# Various utilities for examples 
+
+import numpy as N
+from numpy.testing import set_package_path, restore_path
+
+# XXX: Bouah, hackish... Will go away once scipydata found its way
+set_package_path()
+from pyem.data import oldfaithful
+restore_path()
+
+def get_faithful():
+    """Return faithful data as a nx2 array, first column being duration, second
+    being waiting time."""
+    # Load faithful data, convert waiting into integer, remove L, M and S data
+    data = oldfaithful.load()
+    tmp1 = []
+    tmp2 = []
+    for i in data:
+        if not (i[0] == 'L' or i[0] == 'M' or i[0] == 'S'):
+            tmp1.append(i[0])
+            tmp2.append(i[1])
+            
+    waiting = N.array([int(i) for i in tmp1], dtype = N.float)
+    duration = N.array([i for i in tmp2], dtype = N.float)
+
+    waiting = waiting[:, N.newaxis]
+    duration = duration[:, N.newaxis]
+
+    return N.concatenate((waiting, duration), 1)
+
+def scale(data):
+    """ Scale data such as each col is in the range [0..1].
+
+    Note: inplace."""
+    n = N.min(data, 0)
+    m = N.max(data, 0)
+
+    data -= n
+    data /= (m-n)
+    return data
+


From scipy-svn at scipy.org  Sat Jun  9 04:12:02 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 03:12:02 -0500 (CDT)
Subject: [Scipy-svn] r3083 - in trunk/Lib/sandbox/pyem: . data
	data/oldfaithful data/oldfaithful/src doc doc/examples
	examples profile_data src tests
Message-ID: <20070609081202.3551F39C0E6@new.scipy.org>

Author: cdavid
Date: 2007-06-09 03:11:46 -0500 (Sat, 09 Jun 2007)
New Revision: 3083

Modified:
   trunk/Lib/sandbox/pyem/
   trunk/Lib/sandbox/pyem/data/
   trunk/Lib/sandbox/pyem/data/oldfaithful/
   trunk/Lib/sandbox/pyem/data/oldfaithful/src/
   trunk/Lib/sandbox/pyem/doc/
   trunk/Lib/sandbox/pyem/doc/examples/
   trunk/Lib/sandbox/pyem/examples/
   trunk/Lib/sandbox/pyem/profile_data/
   trunk/Lib/sandbox/pyem/src/
   trunk/Lib/sandbox/pyem/tests/
Log:
Set svn:ignore to sane values everywhere in pyem


Property changes on: trunk/Lib/sandbox/pyem
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so
*.prof

   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/data
___________________________________________________________________
Name: svn:ignore
   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful
___________________________________________________________________
Name: svn:ignore
   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/data/oldfaithful/src
___________________________________________________________________
Name: svn:ignore
   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/doc
___________________________________________________________________
Name: svn:ignore
   - *.aux
*.log
*.out
*.tex

   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/doc/examples
___________________________________________________________________
Name: svn:ignore
   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/examples
___________________________________________________________________
Name: svn:ignore
   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/profile_data
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so

   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/src
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so

   + *.pyc
*.swp
*.pyd
*.so
*.prof


Property changes on: trunk/Lib/sandbox/pyem/tests
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so

   + *.pyc
*.swp
*.pyd
*.so
*.prof


From scipy-svn at scipy.org  Sat Jun  9 04:38:13 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 03:38:13 -0500 (CDT)
Subject: [Scipy-svn] r3084 - in trunk/Lib/sandbox/pyem: . data
Message-ID: <20070609083813.320FD39C19B@new.scipy.org>

Author: cdavid
Date: 2007-06-09 03:37:59 -0500 (Sat, 09 Jun 2007)
New Revision: 3084

Added:
   trunk/Lib/sandbox/pyem/data/setup.py
Modified:
   trunk/Lib/sandbox/pyem/setup.py
Log:
Add data as a proper submodule

Added: trunk/Lib/sandbox/pyem/data/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/setup.py	2007-06-09 08:11:46 UTC (rev 3083)
+++ trunk/Lib/sandbox/pyem/data/setup.py	2007-06-09 08:37:59 UTC (rev 3084)
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+def configuration(parent_package='',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('data',parent_package,top_path)
+    config.add_subpackage('oldfaithful')
+    config.make_config_py() # installs __config__.py
+    return config
+
+if __name__ == '__main__':
+    print 'This is the wrong setup.py file to run'

Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py	2007-06-09 08:11:46 UTC (rev 3083)
+++ trunk/Lib/sandbox/pyem/setup.py	2007-06-09 08:37:59 UTC (rev 3084)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Sat Jun 09 05:00 PM 2007 J
 # TODO:
 #   - check how to handle cmd line build options with distutils and use
 #   it in the building process
@@ -28,6 +28,7 @@
     from numpy.distutils.misc_util import Configuration
     config = Configuration(package_name,parent_package,top_path,
              version     = VERSION)
+    config.add_subpackage('data')
     config.add_data_dir('tests')
     config.add_data_dir('profile_data')
     config.add_extension('c_gden',


From scipy-svn at scipy.org  Sat Jun  9 07:39:04 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 06:39:04 -0500 (CDT)
Subject: [Scipy-svn] r3085 - trunk/Lib/sandbox/pyem
Message-ID: <20070609113904.D9C6939C0E7@new.scipy.org>

Author: cdavid
Date: 2007-06-09 06:38:41 -0500 (Sat, 09 Jun 2007)
New Revision: 3085

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/misc.py
Log:
Clean up densities.py code, set docstrings to rest

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,11 +1,15 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 08:00 PM 2007 J
+"""This module implements various bsic functions related to multivariate
+gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
+__docformat__ = 'restructuredtext'
+
 import numpy as N
 import numpy.linalg as lin
-from numpy.random import randn
+#from numpy.random import randn
 from scipy.stats import chi2
 import misc
 
@@ -18,6 +22,7 @@
         message -- explanation of the error"""
     def __init__(self, message):
         self.message    = message
+        Exception.__init__(self)
     
     def __str__(self):
         return self.message
@@ -25,33 +30,50 @@
 # The following function do all the fancy stuff to check that parameters
 # are Ok, and call the right implementation if args are OK.
 def gauss_den(x, mu, va, log = False):
-    """ Compute multivariate Gaussian density at points x for 
+    """Compute multivariate Gaussian density at points x for 
     mean mu and variance va.
     
+    :Parameters:
+        x : ndarray
+            points where to estimate the pdf.  each row of the array is one
+            point of d dimension
+        mu : ndarray
+            mean of the pdf. Should have same dimension d than points in x.
+        va : ndarray
+            variance of the pdf. If va has d elements, va is interpreted as the
+            diagonal elements of the actual covariance matrix. Otherwise,
+            should be a dxd matrix (and positive definite).
+        log : boolean
+            if True, returns the log-pdf instead of the pdf.
+
+    :Returns:
+        pdf : ndarray
+            Returns a rank 1 array of the pdf at points x.
+
+    Notes
+    -----
     Vector are row vectors, except va which can be a matrix
-    (row vector variance for diagonal variance)
+    (row vector variance for diagonal variance)."""
     
-    If log is True, than the log density is returned 
-    (useful for underflow ?)"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
-    x   = N.atleast_2d(x)
+    lmu  = N.atleast_2d(mu)
+    lva  = N.atleast_2d(va)
+    lx   = N.atleast_2d(x)
     
     #=======================#
     # Checking parameters   #
     #=======================#
-    if len(N.shape(mu)) != 2:
+    if len(N.shape(lmu)) != 2:
         raise DenError("mu is not rank 2")
         
-    if len(N.shape(va)) != 2:
+    if len(N.shape(lva)) != 2:
         raise DenError("va is not rank 2")
         
-    if len(N.shape(x)) != 2:
+    if len(N.shape(lx)) != 2:
         raise DenError("x is not rank 2")
         
-    (n, d)      = x.shape
-    (dm0, dm1)  = mu.shape
-    (dv0, dv1)  = va.shape
+    d = N.shape(lx)[1]
+    (dm0, dm1) = N.shape(lmu)
+    (dv0, dv1) = N.shape(lva)
     
     # Check x and mu same dimension
     if dm0 != 1:
@@ -73,13 +95,13 @@
     #===============#
     if d == 1:
         # scalar case
-        return _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
+        return _scalar_gauss_den(lx[:, 0], lmu[0, 0], lva[0, 0], log)
     elif dv0 == 1:
         # Diagonal matrix case
-        return _diag_gauss_den(x, mu, va, log)
+        return _diag_gauss_den(lx, lmu, lva, log)
     elif dv1 == dv0:
         # full case
-        return  _full_gauss_den(x, mu, va, log)
+        return  _full_gauss_den(lx, lmu, lva, log)
     else:
         raise DenError("variance mode not recognized, this is a bug")
 
@@ -115,20 +137,20 @@
     Call gauss_den instead"""
     # Diagonal matrix case
     d   = mu.size
-    n   = x.shape[0]
+    #n   = x.shape[0]
     if not log:
-        inva    = 1/va[0,0]
-        fac     = (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
-        y       =  (x[:,0] - mu[0,0]) ** 2 * inva * -0.5
+        inva = 1/va[0, 0]
+        fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
+        y =  (x[:, 0] - mu[0, 0]) ** 2 * inva * -0.5
         for i in range(1, d):
-            inva    = 1/va[0,i]
-            fac     *= N.sqrt(inva)
-            y       += (x[:,i] - mu[0,i]) ** 2 * inva * -0.5
-        y   = fac * N.exp(y)
+            inva = 1/va[0, i]
+            fac *= N.sqrt(inva)
+            y += (x[:, i] - mu[0, i]) ** 2 * inva * -0.5
+        y = fac * N.exp(y)
     else:
-        y   = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
+        y = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
         for i in range(1, d):
-            y    +=  _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log)
+            y +=  _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log)
     return y
 
 def _full_gauss_den(x, mu, va, log):
@@ -166,31 +188,46 @@
     return y
 
 # To get coordinatea of a confidence ellipse from multi-variate gaussian pdf
-def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \
-        npoints = misc._DEF_ELL_NP, \
-        level = misc._DEF_LEVEL):
-    """ Given a mean and covariance for multi-variate
-    gaussian, returns npoints points for the ellipse
-    of confidence given by level (all points will be inside
-    the ellipsoides with a probability equal to level)
+def gauss_ell(mu, va, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, \
+        level = misc.DEF_LEVEL):
+    """Given a mean and covariance for multi-variate
+    gaussian, returns the coordinates of the confidense ellipsoid.
     
-    Returns the coordinate x and y of the ellipse"""
+    Compute npoints coordinates for the ellipse of confidence of given level
+    (all points will be inside the ellipsoides with a probability equal to
+    level).
+    
+    :Parameters:
+        mu : ndarray
+            mean of the pdf
+        va : ndarray
+            variance of the pdf
+        dim : sequence
+            sequences of two integers which represent the dimensions where to
+            project the ellipsoid.
+        npoints: int
+            number of points to generate for the ellipse.
+        level : float
+            level of confidence (between 0 and 1).
+
+    :Returns:
+        Returns the coordinate x and y of the ellipse."""
     if level >= 1 or level <= 0:
         raise ValueError("level should be a scale strictly between 0 and 1.""")
     
-    mu      = N.atleast_1d(mu)
-    va      = N.atleast_1d(va)
-    d       = mu.shape[0]
-    c       = N.array(dim)
+    mu = N.atleast_1d(mu)
+    va = N.atleast_1d(va)
+    d = N.shape(mu)[0]
+    c = N.array(dim)
 
     if N.any(c < 0) or N.any(c >= d):
         raise ValueError("dim elements should be >= 0 and < %d (dimension"\
                 " of the variance)" % d)
-    if mu.size == va.size:
+    if N.size(mu) == N.size(va):
         mode    = 'diag'
     else:
-        if va.ndim == 2:
-            if va.shape[0] == va.shape[1]:
+        if N.ndim(va) == 2:
+            if N.shape(va)[0] == N.shape(va)[1]:
                 mode    = 'full'
             else:
                 raise DenError("variance not square")
@@ -215,7 +252,7 @@
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(N.diag(N.sqrt(va)), circle)
     elif mode == 'full':
-        va  = va[c,:][:,c]
+        va  = va[c, :][:, c]
         # Method: compute the cholesky decomp of each cov matrix, that is
         # compute cova such as va = cova * cova' 
         # WARN: scipy is different than matlab here, as scipy computes a lower
@@ -227,22 +264,38 @@
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(cova, circle)
     else:
-        raise DenParam("var mode not recognized")
+        raise ValueError("var mode not recognized")
 
     return elps[0, :], elps[1, :]
 
 def multiple_gauss_den(data, mu, va):
     """Helper function to generate several Gaussian
-    pdf (different parameters) from the same data"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
+    pdf (different parameters) at the same points
 
-    K   = mu.shape[0]
-    n   = data.shape[0]
-    d   = mu.shape[1]
+    :Parameters:
+        data : ndarray
+            points where to estimate the pdfs (n,d).
+        mu : ndarray
+            mean of the pdf, of shape (k,d). One row of dimension d per
+            different component, the number of rows k being the number of
+            component
+        va : ndarray
+            variance of the pdf. One row per different component for diagonal
+            covariance (k, d), or d rows per component for full matrix pdf
+            (k*d,d).
+
+    :Returns:
+        Returns a (n, k) array, each column i being the pdf of the ith mean and
+        ith variance."""
+    mu = N.atleast_2d(mu)
+    va = N.atleast_2d(va)
+
+    K = N.shape(mu)[0]
+    n = N.shape(data)[0]
+    d = N.shape(mu)[1]
     
-    y   = N.zeros((K, n))
-    if mu.size == va.size:
+    y = N.zeros((K, n))
+    if N.size(mu) == N.size(va):
         for i in range(K):
             y[i] = gauss_den(data, mu[i, :], va[i, :])
         return y.T
@@ -252,39 +305,40 @@
         return y.T
 
 if __name__ == "__main__":
-    import pylab
+    pass
+    ## import pylab
 
-    #=========================================
-    # Test plotting a simple diag 2d variance:
-    #=========================================
-    va  = N.array([5, 3])
-    mu  = N.array([2, 3])
+    ## #=========================================
+    ## # Test plotting a simple diag 2d variance:
+    ## #=========================================
+    ## va  = N.array([5, 3])
+    ## mu  = N.array([2, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    X       = randn(1e3, 2)
-    Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## # Generate a multivariate gaussian of mean mu and covariance va
+    ## X       = randn(1e3, 2)
+    ## Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
+    ## Yc      = Yc.transpose() + mu
 
-    # Plotting
-    Xe, Ye  = gauss_ell(mu, va, npoints = 100)
-    pylab.figure()
-    pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-    pylab.plot(Xe, Ye, 'r')
+    ## # Plotting
+    ## Xe, Ye  = gauss_ell(mu, va, npoints = 100)
+    ## pylab.figure()
+    ## pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+    ## pylab.plot(Xe, Ye, 'r')
 
-    #=========================================
-    # Test plotting a simple full 2d variance:
-    #=========================================
-    va  = N.array([[0.2, 0.1],[0.1, 0.5]])
-    mu  = N.array([0, 3])
+    ## #=========================================
+    ## # Test plotting a simple full 2d variance:
+    ## #=========================================
+    ## va  = N.array([[0.2, 0.1],[0.1, 0.5]])
+    ## mu  = N.array([0, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    X       = randn(1e3, 2)
-    Yc      = N.dot(lin.cholesky(va), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## # Generate a multivariate gaussian of mean mu and covariance va
+    ## X       = randn(1e3, 2)
+    ## Yc      = N.dot(lin.cholesky(va), X.transpose())
+    ## Yc      = Yc.transpose() + mu
 
-    # Plotting
-    Xe, Ye  = gauss_ell(mu, va, npoints = 100, level=0.95)
-    pylab.figure()
-    pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-    pylab.plot(Xe, Ye, 'r')
-    pylab.show()
+    ## # Plotting
+    ## Xe, Ye  = gauss_ell(mu, va, npoints = 100, level=0.95)
+    ## pylab.figure()
+    ## pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+    ## pylab.plot(Xe, Ye, 'r')
+    ## pylab.show()

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 08:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -151,8 +151,8 @@
 
         return X
 
-    def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \
-        level = misc._DEF_LEVEL):
+    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
+            level = misc.DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
@@ -262,8 +262,8 @@
     #=================
     # Plotting methods
     #=================
-    def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, 
-            level = misc._DEF_LEVEL):
+    def plot(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, 
+            level = misc.DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
         Returns a list of lines, so that their style can be modified. By default,
@@ -371,7 +371,7 @@
 
         return retval
 
-    def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50,
+    def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
             maxlevel = 0.95):
         """Do all the necessary computation for contour plot of mixture's density.
         
@@ -401,7 +401,7 @@
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM):
+    def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
         gr = N.meshgrid(xrange, yrange)
         X = gr[0].flatten()

Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/misc.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,12 +1,12 @@
-# Last Change: Sat Jun 09 12:00 PM 2007 J
+# Last Change: Sat Jun 09 07:00 PM 2007 J
 
 #========================================================
 # Constants used throughout the module (def args, etc...)
 #========================================================
 # This is the default dimension for representing confidence ellipses
-_DEF_VIS_DIM = [0, 1]
-_DEF_ELL_NP = 100
-_DEF_LEVEL = 0.39
+DEF_VIS_DIM = [0, 1]
+DEF_ELL_NP = 100
+DEF_LEVEL = 0.39
 #=====================================================================
 # "magic number", that is number used to control regularization and co
 # Change them at your risk !


From scipy-svn at scipy.org  Sat Jun  9 07:43:55 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 06:43:55 -0500 (CDT)
Subject: [Scipy-svn] r3086 - trunk/Lib/sandbox/pyem
Message-ID: <20070609114355.2B62E39C0E7@new.scipy.org>

Author: cdavid
Date: 2007-06-09 06:43:51 -0500 (Sat, 09 Jun 2007)
New Revision: 3086

Modified:
   trunk/Lib/sandbox/pyem/misc.py
Log:
Set def arguments to immutable to avoid nasty side effect.

Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py	2007-06-09 11:38:41 UTC (rev 3085)
+++ trunk/Lib/sandbox/pyem/misc.py	2007-06-09 11:43:51 UTC (rev 3086)
@@ -1,10 +1,10 @@
-# Last Change: Sat Jun 09 07:00 PM 2007 J
+# Last Change: Sat Jun 09 08:00 PM 2007 J
 
 #========================================================
 # Constants used throughout the module (def args, etc...)
 #========================================================
 # This is the default dimension for representing confidence ellipses
-DEF_VIS_DIM = [0, 1]
+DEF_VIS_DIM = (0, 1)
 DEF_ELL_NP = 100
 DEF_LEVEL = 0.39
 #=====================================================================


From scipy-svn at scipy.org  Sat Jun  9 10:03:18 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sat,  9 Jun 2007 09:03:18 -0500 (CDT)
Subject: [Scipy-svn] r3087 - in trunk/Lib/sandbox/pyem: . doc examples
Message-ID: <20070609140318.EFA1D39C08C@new.scipy.org>

Author: cdavid
Date: 2007-06-09 09:03:01 -0500 (Sat, 09 Jun 2007)
New Revision: 3087

Added:
   trunk/Lib/sandbox/pyem/doc/pdfestimation.png
Modified:
   trunk/Lib/sandbox/pyem/__init__.py
   trunk/Lib/sandbox/pyem/_c_densities.py
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/doc/
   trunk/Lib/sandbox/pyem/doc/Makefile
   trunk/Lib/sandbox/pyem/doc/index.txt
   trunk/Lib/sandbox/pyem/doc/tutorial.pdf
   trunk/Lib/sandbox/pyem/examples/pdfestimation.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/info.py
   trunk/Lib/sandbox/pyem/online_em.py
Log:
Heavy liftup of the code + docstrings.

Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/__init__.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
 from info import __doc__
 
@@ -8,7 +8,7 @@
 #from online_em import OnGMM as _OnGMM
 #import examples as _examples
 
-__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ = filter(lambda s:not s.startswith('_'), dir())
 
 from numpy.testing import NumpyTest
 test = NumpyTest().test

Modified: trunk/Lib/sandbox/pyem/_c_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/_c_densities.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/_c_densities.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,34 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Thu Nov 09 05:00 PM 2006 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
+"""This module implements some function of densities module in C for efficiency
+reasons.  gaussian, such as pdf estimation, confidence interval/ellipsoids,
+etc..."""
+
+__docformat__ = 'restructuredtext'
+
 # This module uses a C implementation through ctypes, for diagonal cases
 # TODO:
 #   - portable way to find/open the shared library
 #   - full cov matrice
+#   - test before inclusion
 
 import numpy as N
 import numpy.linalg as lin
-from numpy.random import randn
-from scipy.stats import chi2
-import densities as D
+#from numpy.random import randn
+#from scipy.stats import chi2
+#import densities as D
 
 import ctypes
-from ctypes import cdll, c_uint, c_int, c_double, POINTER
+from ctypes import c_uint, c_int
 from numpy.ctypeslib import ndpointer, load_library
 
 ctypes_major    = int(ctypes.__version__.split('.')[0])
 if ctypes_major < 1:
-    msg =  "version of ctypes is %s, expected at least %s" \
-            % (ctypes.__version__, '1.0.0')
-    raise ImportError(msg)
+    raise ImportError(msg =  "version of ctypes is %s, expected at least %s"\
+            % (ctypes.__version__, '1.0.1'))
 
 # Requirements for diag gden
 _gden   = load_library('c_gden.so', __file__)
@@ -75,9 +81,9 @@
     if len(N.shape(x)) != 2:
         raise DenError("x is not rank 2")
         
-    (n, d)      = x.shape
-    (dm0, dm1)  = mu.shape
-    (dv0, dv1)  = va.shape
+    (n, d)      = N.shape(x)
+    (dm0, dm1)  = N.shape(mu)
+    (dv0, dv1)  = N.shape(va)
     
     # Check x and mu same dimension
     if dm0 != 1:
@@ -165,9 +171,9 @@
         #     inva.ctypes.data_as(POINTER(c_double)),
         #     y.ctypes.data_as(POINTER(c_double)))
     else:
-        y   = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
+        y   = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
         for i in range(1, d):
-            y    +=  _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log)
+            y    +=  _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log)
         return y
 
 def _full_gauss_den(x, mu, va, log):
@@ -199,19 +205,20 @@
     return y
 
 if __name__ == "__main__":
-    #=========================================
-    # Test accuracy between pure and C python
-    #=========================================
-    mu  = N.array([2.0, 3])
-    va  = N.array([5.0, 3])
+    pass
+    ##=========================================
+    ## Test accuracy between pure and C python
+    ##=========================================
+    #mu  = N.array([2.0, 3])
+    #va  = N.array([5.0, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    nframes = 1e4
-    X       = randn(nframes, 2)
-    Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## Generate a multivariate gaussian of mean mu and covariance va
+    #nframes = 1e4
+    #X       = randn(nframes, 2)
+    #Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
+    #Yc      = Yc.transpose() + mu
 
-    Y   = D.gauss_den(Yc, mu, va)
-    Yt  = gauss_den(Yc, mu, va)
+    #Y   = D.gauss_den(Yc, mu, va)
+    #Yt  = gauss_den(Yc, mu, va)
 
-    print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)
+    #print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,8 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 08:00 PM 2007 J
-"""This module implements various bsic functions related to multivariate
+# Last Change: Sat Jun 09 10:00 PM 2007 J
+"""This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
 __docformat__ = 'restructuredtext'
@@ -50,10 +50,10 @@
         pdf : ndarray
             Returns a rank 1 array of the pdf at points x.
 
-    Notes
-    -----
-    Vector are row vectors, except va which can be a matrix
-    (row vector variance for diagonal variance)."""
+    Note
+    ----
+        Vector are row vectors, except va which can be a matrix
+        (row vector variance for diagonal variance)."""
     
     lmu  = N.atleast_2d(mu)
     lva  = N.atleast_2d(va)


Property changes on: trunk/Lib/sandbox/pyem/doc
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so
*.prof


   + *.pyc
*.swp
*.pyd
*.so
*.prof
*.out
*.tex


Modified: trunk/Lib/sandbox/pyem/doc/Makefile
===================================================================
--- trunk/Lib/sandbox/pyem/doc/Makefile	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/Makefile	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,4 +1,4 @@
-# Last Change: Mon May 28 10:00 AM 2007 J
+# Last Change: Sat Jun 09 05:00 PM 2007 J
 
 # This makefile is used to build the pdf from the rest file and inlined code
 # from python examples
@@ -7,7 +7,7 @@
 rst2tex	= PYTHONPATH=/home/david/local/lib/python2.4/site-packages rst2newlatex.py \
 		  --stylesheet-path base.tex --user-stylesheet user.tex 
 
-pytexfiles	= pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex
+pytexfiles	= pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex pdfestimation.tex
 
 SOURCEPATH	= $(PWD)
 
@@ -24,15 +24,18 @@
 pyem.tex: index.txt
 	$(rst2tex) $< > $@
 
-basic_example1.tex: examples/basic_example1.py
+basic_example1.tex: ../examples/basic_example1.py
 	$(py2tex) $< > $@
 
-basic_example2.tex: examples/basic_example2.py
+basic_example2.tex: ../examples/basic_example2.py
 	$(py2tex) $< > $@
 
-basic_example3.tex: examples/basic_example3.py
+basic_example3.tex: ../examples/basic_example3.py
 	$(py2tex) $< > $@
 
+pdfestimation.tex: ../examples/pdfestimation.py
+	$(py2tex) $< > $@
+
 clean:
 	for i in $(pytexfiles); do \
 		rm -f `echo $$i`; \

Modified: trunk/Lib/sandbox/pyem/doc/index.txt
===================================================================
--- trunk/Lib/sandbox/pyem/doc/index.txt	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/index.txt	2007-06-09 14:03:01 UTC (rev 3087)
@@ -13,7 +13,7 @@
         file: Bic_example.png
     /restindex
 
-.. Last Change: Mon May 28 10:00 AM 2007 J
+.. Last Change: Sat Jun 09 07:00 PM 2007 J
 
 ===================================================
  PyEM, a python package for Gaussian mixture models
@@ -176,14 +176,36 @@
 Examples 
 =========
 
-TODO.
+Using EM for pdf estimation
+---------------------------
 
+The following example uses the old faithful dataset and is available in the
+example directory. It models the joint distribution (d(t), w(t+1)), where d(t)
+is the duration time, and w(t+1) the waiting time for the next eruption. It
+selects the best model using the BIC.
+
+.. raw:: latex
+
+    \input{pdfestimation.tex}
+
+.. figure:: pdfestimation.png 
+    :width: 500
+    :height: 400
+
+    isodensity curves for the old faithful data modeled by a 1, 2, 3 and 4
+    componenits model (up to bottom, left to right).
+
+
 Using EM for clustering
 -----------------------
 
+TODO (this is fundamentally the same than pdf estimation, though)
+
 Using PyEM for supervised learning
 ----------------------------------
 
+TODO
+
 Note on performances
 ====================
 

Added: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)

Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,15 +1,11 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 07:00 PM 2007 J
 
 # Example of doing pdf estimation with EM algorithm. Requires matplotlib.
 import numpy as N
-from numpy.testing import set_package_path, restore_path
-
 import pylab as P
 
-set_package_path()
-import pyem
-restore_path()
+from scipy.sandbox import pyem
 import utils
 
 oldfaithful = utils.get_faithful()
@@ -45,6 +41,8 @@
     X, Y, Z, V = gm.density_on_grid()
     P.contour(X, Y, Z, V)
     P.plot(dt[:, 0], dt[:, 1], '.')
+    P.xlabel('duration time (scaled)')
+    P.ylabel('waiting time (scaled)')
 
 print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
 P.show()

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,14 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
-# Module to implement GaussianMixture class.
+"""Module implementing GM, a class which represents Gaussian mixtures.
 
+GM instances can be used to create, sample mixtures. They also provide
+different plotting facilities, such as isodensity contour for multi dimensional
+models, ellipses of confidence."""
+
+__docformat__ = 'restructuredtext'
+
 import numpy as N
 from numpy.random import randn, rand
 import numpy.linalg as lin
@@ -21,12 +27,12 @@
 #   be used as long as w, mu and va are not set
 #   - We have to use scipy now for chisquare pdf, so there may be other
 #   methods to be used, ie for implementing random index.
-#   - there is no check on internal state of the GM, that is does w, mu and va values
-#   make sense (eg singular values)
-#   - plot1d is still very rhough. There should be a sensible way to 
-#   modify the result plot (maybe returns a dic with global pdf, component pdf and
-#   fill matplotlib handles). Should be coherent with plot
-class GmParamError:
+#   - there is no check on internal state of the GM, that is does w, mu and va
+#   values make sense (eg singular values) - plot1d is still very rhough. There
+#   should be a sensible way to modify the result plot (maybe returns a dic
+#   with global pdf, component pdf and fill matplotlib handles). Should be
+#   coherent with plot
+class GmParamError(Exception):
     """Exception raised for errors in gmm params
 
     Attributes:
@@ -34,6 +40,7 @@
         message -- explanation of the error
     """
     def __init__(self, message):
+        Exception.__init__(self)
         self.message    = message
     
     def __str__(self):
@@ -52,11 +59,27 @@
     # Methods to construct a mixture
     #===============================
     def __init__(self, d, k, mode = 'diag'):
-        """Init a Gaussian Mixture of k components, each component being a 
-        d multi-variate Gaussian, with covariance matrix of style mode.
-        
-        If you want to build a Gaussian Mixture with knowns weights, means
-        and variances, you can use GM.fromvalues method directly"""
+        """Init a Gaussian Mixture.
+
+        :Parameters:
+            d : int
+                dimension of the mixture.
+            k : int
+                number of component in the mixture.
+            mode : string
+                mode of covariance
+
+        :Returns:
+            an instance of GM.
+
+        Note
+        ----
+
+        Only full and diag mode are supported for now.
+
+        :SeeAlso:
+            If you want to build a Gaussian Mixture with knowns weights, means
+            and variances, you can use GM.fromvalues method directly"""
         if mode not in self._cov_mod:
             raise GmParamError("mode %s not recognized" + str(mode))
 
@@ -80,16 +103,42 @@
             self.is1d = True
 
     def set_param(self, weights, mu, sigma):
-        """Set parameters of the model. Args should
-        be conformant with metparameters d and k given during
-        initialisation"""
+        """Set parameters of the model. 
+        
+        Args should be conformant with metparameters d and k given during
+        initialisation.
+        
+        :Parameters:
+            weights : ndarray
+                weights of the mixture (k elements)
+            mu : ndarray
+                means of the mixture. One component's mean per row, k row for k
+                components.
+            sigma : ndarray
+                variances of the mixture. For diagonal models, one row contains
+                the diagonal elements of the covariance matrix. For full
+                covariance, d rows for one variance.
+
+        Examples
+        --------
+        Create a 3 component, 2 dimension mixture with full covariance matrices
+
+        >>> w = numpy.array([0.2, 0.5, 0.3])
+        >>> mu = numpy.array([[0., 0.], [1., 1.]])
+        >>> va = numpy.array([[1., 0.], [0., 1.], [2., 0.5], [0.5, 1]])
+        >>> gm = GM(2, 3, 'full')
+        >>> gm.set_param(w, mu, va)
+
+        :SeeAlso:
+            If you know already the parameters when creating the model, you can
+            simply use the method class GM.fromvalues."""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
             raise GmParamError("Number of given components is %d, expected %d" 
                     % (k, self.k))
         if not d == self.d:
-            raise GmParamError("Dimension of the given model is %d, expected %d" 
-                    % (d, self.d))
+            raise GmParamError("Dimension of the given model is %d, "\
+                "expected %d" % (d, self.d))
         if not mode == self.mode and not d == 1:
             raise GmParamError("Given covariance mode is %s, expected %s"
                     % (mode, self.mode))
@@ -104,16 +153,34 @@
         """This class method can be used to create a GM model
         directly from its parameters weights, mean and variance
         
-        w, mu, va   = GM.gen_param(d, k)
-        gm  = GM(d, k)
-        gm.set_param(w, mu, va)
+        :Parameters:
+            weights : ndarray
+                weights of the mixture (k elements)
+            mu : ndarray
+                means of the mixture. One component's mean per row, k row for k
+                components.
+            sigma : ndarray
+                variances of the mixture. For diagonal models, one row contains
+                the diagonal elements of the covariance matrix. For full
+                covariance, d rows for one variance.
 
+        :Returns:
+            gm : GM
+                an instance of GM.
+
+        Examples
+        --------
+
+        >>> w, mu, va   = GM.gen_param(d, k)
+        >>> gm  = GM(d, k)
+        >>> gm.set_param(w, mu, va)
+
         and
         
-        w, mu, va   = GM.gen_param(d, k)
-        gm  = GM.fromvalue(w, mu, va)
+        >>> w, mu, va   = GM.gen_param(d, k)
+        >>> gm  = GM.fromvalue(w, mu, va)
 
-        Are equivalent """
+        are strictly equivalent."""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         res = cls(d, k, mode)
         res.set_param(weights, mu, sigma)
@@ -123,7 +190,15 @@
     # Fundamental facilities (sampling, confidence, etc..)
     #=====================================================
     def sample(self, nframes):
-        """ Sample nframes frames from the model """
+        """ Sample nframes frames from the model.
+        
+        :Parameters:
+            nframes : int
+                number of samples to draw.
+        
+        :Returns:
+            samples : ndarray
+                samples in the format one sample per row (nframes, d)."""
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
@@ -134,47 +209,60 @@
         X   = randn(nframes, self.d)        
 
         if self.mode == 'diag':
-            X   = self.mu[S, :]  + X * N.sqrt(self.va[S,:])
+            X   = self.mu[S, :]  + X * N.sqrt(self.va[S, :])
         elif self.mode == 'full':
             # Faster:
             cho = N.zeros((self.k, self.va.shape[1], self.va.shape[1]))
             for i in range(self.k):
                 # Using cholesky looks more stable than sqrtm; sqrtm is not
                 # available in numpy anyway, only in scipy...
-                cho[i]  = lin.cholesky(self.va[i*self.d:i*self.d+self.d,:])
+                cho[i]  = lin.cholesky(self.va[i*self.d:i*self.d+self.d, :])
 
             for s in range(self.k):
                 tmpind      = N.where(S == s)[0]
                 X[tmpind]   = N.dot(X[tmpind], cho[s].transpose()) + self.mu[s]
         else:
-            raise GmParamError('cov matrix mode not recognized, this is a bug !')
+            raise GmParamError("cov matrix mode not recognized, "\
+                    "this is a bug !")
 
         return X
 
-    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
+    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, 
             level = misc.DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
-        Returns:
-            -Xe:    a list of x coordinates for the ellipses (Xe[i] is
-            the array containing x coordinates of the ith Gaussian)
-            -Ye:    a list of y coordinates for the ellipses
+        :Parameters:
+            dim : sequence
+                sequences of two integers which represent the dimensions where to
+                project the ellipsoid.
+            npoints : int
+                number of points to generate for the ellipse.
+            level : float
+                level of confidence (between 0 and 1).
 
-        Example:
+        :Returns:
+            Xe : sequence
+                a list of x coordinates for the ellipses (Xe[i] is the array
+                containing x coordinates of the ith Gaussian)
+            Ye : sequence
+                a list of y coordinates for the ellipses.
+
+        Examples
+        --------
             Suppose we have w, mu and va as parameters for a mixture, then:
             
-            gm      = GM(d, k)
-            gm.set_param(w, mu, va)
-            X       = gm.sample(1000)
-            Xe, Ye  = gm.conf_ellipsoids()
-            pylab.plot(X[:,0], X[:, 1], '.')
-            for k in len(w):
-                pylab.plot(Xe[k], Ye[k], 'r')
+            >>> gm      = GM(d, k)
+            >>> gm.set_param(w, mu, va)
+            >>> X       = gm.sample(1000)
+            >>> Xe, Ye  = gm.conf_ellipsoids()
+            >>> pylab.plot(X[:,0], X[:, 1], '.')
+            >>> for k in len(w):
+            ...    pylab.plot(Xe[k], Ye[k], 'r')
                 
             Will plot samples X draw from the mixture model, and
             plot the ellipses of equi-probability from the mean with
-            fixed level of confidence 0.39.  """
+            default level of confidence."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -187,14 +275,14 @@
         Ye  = []   
         if self.mode == 'diag':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i,:], self.va[i,:], 
+                xe, ye  = densities.gauss_ell(self.mu[i, :], self.va[i, :], 
                         dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
         elif self.mode == 'full':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i,:], 
-                        self.va[i*self.d:i*self.d+self.d,:], 
+                xe, ye  = densities.gauss_ell(self.mu[i, :], 
+                        self.va[i*self.d:i*self.d+self.d, :], 
                         dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
@@ -202,8 +290,11 @@
         return Xe, Ye
     
     def check_state(self):
+        """Returns true if the parameters of the model are valid. 
+
+        For Gaussian mixtures, this means weights summing to 1, and variances
+        to be positive definite.
         """
-        """
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
@@ -222,18 +313,33 @@
         cond    = N.zeros(self.k)
         ava     = N.absolute(self.va)
         for c in range(self.k):
-            cond[c] = N.amax(ava[c,:]) / N.amin(ava[c,:])
+            cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
 
         print cond
 
-    def gen_param(self, d, nc, varmode = 'diag', spread = 1):
-        """Generate valid parameters for a gaussian mixture model.
-        d is the dimension, nc the number of components, and varmode
-        the mode for cov matrices.
+    @classmethod
+    def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
+        """Generate random, valid parameters for a gaussian mixture model.
 
+        :Parameters:
+            d : int
+                the dimension
+            nc : int
+                the number of components
+            varmode : string
+                covariance matrix mode ('full' or 'diag').
+
+        :Returns:
+            w : ndarray
+                weights of the mixture
+            mu : ndarray
+                means of the mixture
+            w : ndarray
+                variances of the mixture
+
+        Notes
+        -----
         This is a class method.
-
-        Returns: w, mu, va
         """
         w   = abs(randn(nc))
         w   = w / sum(w, 0)
@@ -251,13 +357,13 @@
 
         return w, mu, va
 
-    gen_param = classmethod(gen_param)
+    #gen_param = classmethod(gen_param)
 
-    #=======================
-    # Regularization methods
-    #=======================
-    def _regularize(self):
-        raise NotImplemented("No regularization")
+    # #=======================
+    # # Regularization methods
+    # #=======================
+    # def _regularize(self):
+    #     raise NotImplemented("No regularization")
 
     #=================
     # Plotting methods
@@ -266,10 +372,29 @@
             level = misc.DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
-        Returns a list of lines, so that their style can be modified. By default,
-        the style is red color, and nolegend for all of them.
+        Returns a list of lines handle, so that their style can be modified. By
+        default, the style is red color, and nolegend for all of them.
         
-        Does not work for 1d"""
+        :Parameters:
+            dim : sequence
+                sequence of two integers, the dimensions of interest.
+            npoints : int
+                Number of points to use for the ellipsoids.
+            level : int
+                level of confidence (to use with fill argument)
+        
+        :Returns:
+            h : sequence
+                Returns a list of lines handle so that their properties
+                can be modified (eg color, label, etc...):
+
+        Note
+        ----
+        Does not work for 1d. Requires matplotlib
+        
+        :SeeAlso:
+            conf_ellipses is used to compute the ellipses. Use this if you want
+            to plot with something else than matplotlib."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -282,22 +407,32 @@
         Xe, Ye  = self.conf_ellipses(dim, npoints, level)
         try:
             import pylab as P
-            return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
+            return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in
+                    range(k)]
             #for i in range(k):
             #    P.plot(Xe[i], Ye[i], 'r')
         except ImportError:
             raise GmParamError("matplotlib not found, cannot plot...")
 
-    def plot1d(self, level = 0.5, fill = 0, gpdf = 0):
-        """This function plots the pdfs of each component of the model. 
-        If gpdf is 1, also plots the global pdf. If fill is 1, fill confidence
-        areas using level argument as a level value
+    def plot1d(self, level = misc.DEF_LEVEL, fill = False, gpdf = False):
+        """Plots the pdf of each component of the 1d mixture.
         
-        Returns a dictionary h of plot handles so that their properties can
-        be modified (eg color, label, etc...):
-            - h['pdf'] is a list of lines, one line per component pdf
-            - h['gpdf'] is the line for the global pdf
-            - h['conf'] is a list of filling area
+        :Parameters:
+            level : int
+                level of confidence (to use with fill argument)
+            fill : bool
+                if True, the area of the pdf corresponding to the given
+                confidence intervales is filled.
+            gpdf : bool
+                if True, the global pdf is plot.
+        
+        :Returns:
+            h : dict
+                Returns a dictionary h of plot handles so that their properties
+                can be modified (eg color, label, etc...):
+                - h['pdf'] is a list of lines, one line per component pdf
+                - h['gpdf'] is the line for the global pdf
+                - h['conf'] is a list of filling area
         """
         if not self.is1d:
             raise ValueError("This function does not make sense for "
@@ -310,12 +445,12 @@
             raise GmParamError("the model is not one dimensional model")
         from scipy.stats import norm
         nrm     = norm(0, 1)
-        pval    = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2)
+        pval    = N.sqrt(self.va[:, 0]) * nrm.ppf((1+level)/2)
 
         # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
         # gives the range we are taking in account for each gaussian
         mc  = 3
-        std = N.sqrt(self.va[:,0])
+        std = N.sqrt(self.va[:, 0])
         m   = N.amin(self.mu[:, 0] - mc * std)
         M   = N.amax(self.mu[:, 0] + mc * std)
 
@@ -326,7 +461,7 @@
 
         # Prepare the dic of plot handles to return
         ks  = ['pdf', 'conf', 'gpdf']
-        hp  = dict((i,[]) for i in ks)
+        hp  = dict((i, []) for i in ks)
         try:
             import pylab as P
             for c in range(self.k):
@@ -336,7 +471,8 @@
                 h   = P.plot(x, y, 'r', label ='_nolegend_')
                 hp['pdf'].extend(h)
                 if fill:
-                    #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + self.mu[c][0], 
+                    #P.axvspan(-pval[c] + self.mu[c][0], pval[c] +
+                    #self.mu[c][0], 
                     #        facecolor = 'b', alpha = 0.2)
                     id1 = -pval[c] + self.mu[c]
                     id2 = pval[c] + self.mu[c]
@@ -350,7 +486,8 @@
                             facecolor = 'b', alpha = 0.1, label='_nolegend_')
                     hp['conf'].extend(h)
                     #P.fill([xc[0], xc[0], xc[-1], xc[-1]], 
-                    #        [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = 0.2)
+                    #        [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha =
+                    #        0.2)
             if gpdf:
                 h           = P.plot(x, Yt, 'r:', label='_nolegend_')
                 hp['gpdf']  = h
@@ -363,7 +500,7 @@
         the pdf of the mixture."""
         # XXX: have a public function to compute the pdf at given points
         # instead...
-        std = N.sqrt(self.va[:,0])
+        std = N.sqrt(self.va[:, 0])
         retval = N.empty((x.size, self.k))
         for c in range(self.k):
             retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
@@ -373,9 +510,30 @@
 
     def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
             maxlevel = 0.95):
-        """Do all the necessary computation for contour plot of mixture's density.
+        """Do all the necessary computation for contour plot of mixture's
+        density.
         
-        Returns X, Y, Z and V as expected by mpl contour function."""
+        :Parameters:
+            dim : sequence
+                sequence of two integers, the dimensions of interest.
+            nx : int
+                Number of points to use for the x axis of the grid
+            ny : int
+                Number of points to use for the y axis of the grid
+        
+        :Returns:
+            X : ndarray
+                points of the x axis of the grid
+            Y : ndarray
+                points of the y axis of the grid
+            Z : ndarray
+                values of the density on X and Y
+            V : ndarray
+                Contour values to display.
+            
+        Note
+        ----
+        X, Y, Z and V are as expected by matplotlib contour function."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -397,13 +555,14 @@
         X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
                 N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
+        # XXX: how to find "good" values for level ?
         V = [-5, -3, -1, -0.5, ]
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM):
+    def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
-        gr = N.meshgrid(xrange, yrange)
+        gr = N.meshgrid(rangex, rangey)
         X = gr[0].flatten()
         Y = gr[1].flatten()
         xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
@@ -412,7 +571,7 @@
         dva = self._get_va(dim)
         den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
         den = N.sum(den, 1)
-        den = den.reshape(len(yrange), len(xrange))
+        den = den.reshape(len(rangey), len(rangex))
 
         X = gr[0]
         Y = gr[1]
@@ -435,16 +594,16 @@
 
     # Syntactic sugar
     def __repr__(self):
-        repr    = ""
-        repr    += "Gaussian Mixture:\n"
-        repr    += " -> %d dimensions\n" % self.d
-        repr    += " -> %d components\n" % self.k
-        repr    += " -> %s covariance \n" % self.mode
+        msg = ""
+        msg += "Gaussian Mixture:\n"
+        msg += " -> %d dimensions\n" % self.d
+        msg += " -> %d components\n" % self.k
+        msg += " -> %s covariance \n" % self.mode
         if self.is_valid:
-            repr    += "Has initial values"""
+            msg += "Has initial values"""
         else:
-            repr    += "Has no initial values yet"""
-        return repr
+            msg += "Has no initial values yet"""
+        return msg
 
     def __str__(self):
         return self.__repr__()
@@ -472,19 +631,26 @@
 
 def check_gmm_param(w, mu, va):
     """Check that w, mu and va are valid parameters for
-    a mixture of gaussian: w should sum to 1, there should
-    be the same number of component in each param, the variances
-    should be positive definite, etc... 
+    a mixture of gaussian.
     
-    Params:
-        w   = vector or list of weigths of the mixture (K elements)
-        mu  = matrix: K * d
-        va  = list of variances (vector K * d or square matrices Kd * d)
+    w should sum to 1, there should be the same number of component in each
+    param, the variances should be positive definite, etc... 
+    
+    :Parameters:
+        w : ndarray
+            vector or list of weigths of the mixture (K elements)
+        mu : ndarray
+            matrix: K * d
+        va : ndarray
+            list of variances (vector K * d or square matrices Kd * d)
 
-    returns:
-        K   = number of components
-        d   = dimension
-        mode    = 'diag' if diagonal covariance, 'full' of full matrices
+    :Returns:
+        k : int
+            number of components
+        d : int
+            dimension
+        mode : string
+            'diag' if diagonal covariance, 'full' of full matrices
     """
         
     # Check that w is valid
@@ -527,34 +693,35 @@
     return K, d, mode
         
 if __name__ == '__main__':
-    # Meta parameters:
-    #   - k = number of components
-    #   - d = dimension
-    #   - mode : mode of covariance matrices
-    d       = 5
-    k       = 4
+    pass
+    ## # Meta parameters:
+    ## #   - k = number of components
+    ## #   - d = dimension
+    ## #   - mode : mode of covariance matrices
+    ## d       = 5
+    ## k       = 4
 
-    # Now, drawing a model
-    mode    = 'full'
-    nframes = 1e3
+    ## # Now, drawing a model
+    ## mode    = 'full'
+    ## nframes = 1e3
 
-    # Build a model with random parameters
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    gm          = GM.fromvalues(w, mu, va)
+    ## # Build a model with random parameters
+    ## w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
+    ## gm          = GM.fromvalues(w, mu, va)
 
-    # Sample nframes frames  from the model
-    X   = gm.sample(nframes)
+    ## # Sample nframes frames  from the model
+    ## X   = gm.sample(nframes)
 
-    # Plot the data
-    import pylab as P
-    P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
+    ## # Plot the data
+    ## import pylab as P
+    ## P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
 
-    # Real confidence ellipses with confidence level 
-    level       = 0.50
-    h           = gm.plot(level=level)
+    ## # Real confidence ellipses with confidence level 
+    ## level       = 0.50
+    ## h           = gm.plot(level=level)
 
-    # set the first ellipse label, which will appear in the legend
-    h[0].set_label('confidence ell at level ' + str(level))
+    ## # set the first ellipse label, which will appear in the legend
+    ## h[0].set_label('confidence ell at level ' + str(level))
 
-    P.legend(loc = 0)
-    P.show()
+    ## P.legend(loc = 0)
+    ## P.show()

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,6 +1,12 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
+"""Module implementing GMM, a class to estimate Gaussian mixture models using
+EM, and EM, a class which use GMM instances to estimate models parameters using
+the ExpectationMaximization algorithm."""
+
+__docformat__ = 'restructuredtext'
+
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
 #   not sure which ones are appropriates
@@ -8,22 +14,23 @@
 #   - online EM
 
 import numpy as N
-import numpy.linalg as lin
+#import numpy.linalg as lin
 from numpy.random import randn
 #import _c_densities as densities
 import densities
 #from kmean import kmean
 from scipy.cluster.vq import kmeans2 as kmean
-from gauss_mix import GM
+#from gauss_mix import GM
 
-from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
+#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
 
 # Error classes
 class GmmError(Exception):
     """Base class for exceptions in this module."""
-    pass
+    def __init__(self):
+        Exception.__init__(self)
 
-class GmmParamError:
+class GmmParamError(GmmError):
     """Exception raised for errors in gmm params
 
     Attributes:
@@ -31,41 +38,33 @@
         message -- explanation of the error
     """
     def __init__(self, message):
+        GmmError.__init__(self)
         self.message    = message
     
     def __str__(self):
         return self.message
 
-# Not sure yet about how to design different mixture models. Most of the code 
-# is different # (pdf, update part of EM, etc...) and I am not sure it makes 
-# sense to use inheritance for # interface specification in python, since its 
-# dynamic type systeme.
-
-# Anyway, a mixture model class should encapsulates all details 
-# concerning getting sufficient statistics (SS), likelihood and bic.
 class MixtureModel(object):
     pass
 
 class ExpMixtureModel(MixtureModel):
-    """Class to model mixture of exponential pdf (eg Gaussian, exponential, Laplace, 
-    etc..). This is a special case because some parts of EM are common for those
-    models..."""
+    """Class to model mixture of exponential pdf (eg Gaussian, exponential,
+    Laplace, etc..). This is a special case because some parts of EM are common
+    for those models..."""
     pass
 
 class GMM(ExpMixtureModel):
-    """ A class to model a Gaussian Mixture Model (GMM). An instance of 
-    this class is created by giving weights, mean and variances in the ctor.
-    An instanciated object can be sampled, trained by EM. 
-    
-    The class method gen_model can be used without instanciation."""
-
+    """ A class to model a Gaussian Mixture Model (GMM). An instance of this
+    class is created by giving weights, mean and variances in the ctor.  An
+    instanciated object can be sampled, trained by EM. """
     def init_kmean(self, data, niter = 5):
         """ Init the model with kmean."""
         k       = self.gm.k
         d       = self.gm.d
         init    = data[0:k, :]
 
-        # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+        # XXX: This is bogus initialization should do better (in kmean or here,
+        # do not know yet): should 
         (code, label)   = kmean(data, init, niter, minit = 'matrix')
 
         w   = N.ones(k) / k
@@ -74,14 +73,15 @@
             va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va[i,j] = N.cov(data[N.where(label==i), j], rowvar = 0)
+                    va[i, j] = N.cov(data[N.where(label==i), j], rowvar = 0)
         elif self.gm.mode == 'full':
             va  = N.zeros((k*d, d))
             for i in range(k):
-                va[i*d:i*d+d,:] = \
+                va[i*d:i*d+d, :] = \
                     N.cov(data[N.where(label==i)], rowvar = 0)
         else:
-            raise GmmParamError("mode " + str(mode) + " not recognized")
+            raise GmmParamError("mode " + str(self.gm.mode) + \
+                    " not recognized")
 
         self.gm.set_param(w, mu, va)
 
@@ -96,8 +96,8 @@
             mu  = randn(k, d)
             va  = N.fabs(randn(k, d))
         else:
-            raise GmmParamError("""init_random not implemented for
-                    mode %s yet""", mode)
+            raise GmmParamError("init_random not implemented for "
+                    "mode %s yet", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         
@@ -109,8 +109,18 @@
     #   - To handle the different modes, we could do something "fancy" such as
     #   replacing methods, to avoid checking cases everywhere and unconsistency.
     def __init__(self, gm, init = 'kmean'):
-        """ Initialize a GMM with weight w, mean mu and variances va, and initialization
-        method for training init (kmean by default)"""
+        """Initialize a mixture model.
+        
+        Initialize the model from a GM instance. This class implements all the
+        necessary functionalities for EM.
+
+        :Parameters:
+            gm : GM
+                the mixture model to train.
+            init : string
+                initialization method to use.
+        
+        """
         self.gm = gm
 
         # Possible init methods
@@ -124,17 +134,18 @@
         self.initst = init
 
     def sufficient_statistics(self, data):
-        """ Return normalized and non-normalized sufficient statistics
-        from the model.
+        """Compute responsabilities.
         
-        Computes the latent variable distribution (a 
-        posteriori probability) knowing the explicit data 
-        for the Gaussian model (w, mu, var): gamma(t, i) = 
-            P[state = i | observation = data(t); w, mu, va]
+        Return normalized and non-normalized sufficient statistics from the
+        model.
+        
+        Note
+        ----
+        Computes the latent variable distribution (a posteriori probability)
+        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
+        i) = P[state = i | observation = data(t); w, mu, va]
 
         This is basically the E step of EM for GMM."""
-        n   = data.shape[0]
-
         # compute the gaussian pdf
         tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
@@ -149,22 +160,22 @@
         from the a posteriori pdf, computed by gmm_posterior
         (E step).
         """
-        k       = self.gm.k
-        d       = self.gm.d
-        n       = data.shape[0]
-        invn    = 1.0/n
-        mGamma  = N.sum(gamma, axis = 0)
+        k = self.gm.k
+        d = self.gm.d
+        n = data.shape[0]
+        invn = 1.0/n
+        mGamma = N.sum(gamma, axis = 0)
 
         if self.gm.mode == 'diag':
-            mu      = N.zeros((k, d))
-            va      = N.zeros((k, d))
-            gamma   = gamma.T
+            mu = N.zeros((k, d))
+            va = N.zeros((k, d))
+            gamma = gamma.T
             for c in range(k):
-                x   = N.dot(gamma[c:c+1,:], data)[0,:]
-                xx  = N.dot(gamma[c:c+1,:], data ** 2)[0,:]
+                x = N.dot(gamma[c:c+1, :], data)[0, :]
+                xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :]
 
-                mu[c,:] = x / mGamma[c]
-                va[c,:] = xx  / mGamma[c] - mu[c,:] ** 2
+                mu[c, :] = x / mGamma[c]
+                va[c, :] = xx  / mGamma[c] - mu[c, :] ** 2
             w   = invn * mGamma
 
         elif self.gm.mode == 'full':
@@ -177,21 +188,22 @@
             mu  = N.zeros((k, d))
             va  = N.zeros((k*d, d))
 
-            gamma   = gamma.transpose()
+            gamma = gamma.transpose()
             for c in range(k):
                 #x   = N.sum(N.outer(gamma[:, c], 
                 #            N.ones((1, d))) * data, axis = 0)
-                x   = N.dot(gamma[c:c+1,:], data)[0,:]
-                xx  = N.zeros((d, d))
+                x = N.dot(gamma[c:c+1, :], data)[0, :]
+                xx = N.zeros((d, d))
                 
                 # This should be much faster than recursing on n...
                 for i in range(d):
                     for j in range(d):
-                        xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[c,:], axis = 0)
+                        xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :],
+                                axis = 0)
 
-                mu[c,:] = x / mGamma[c]
-                va[c*d:c*d+d,:] = xx  / mGamma[c] - \
-                                    N.outer(mu[c,:], mu[c,:])
+                mu[c, :] = x / mGamma[c]
+                va[c*d:c*d+d, :] = xx  / mGamma[c] \
+                        - N.outer(mu[c, :], mu[c, :])
             w   = invn * mGamma
         else:
             raise GmmParamError("varmode not recognized")
@@ -226,19 +238,17 @@
         of the definition given here. """
 
         if self.gm.mode == 'diag':
-            """ for a diagonal model, we have
-            k - 1 (k weigths, but one constraint of normality)
-            + k * d (means) + k * d (variances) """
+            # for a diagonal model, we have k - 1 (k weigths, but one
+            # constraint of normality) + k * d (means) + k * d (variances)
             free_deg    = self.gm.k * (self.gm.d * 2 + 1) - 1
         elif self.gm.mode == 'full':
-            """ for a full model, we have
-            k - 1 (k weigths, but one constraint of normality)
-            + k * d (means) + k * d * d / 2 (each covariance matrice
-            has d **2 params, but with positivity constraint) """
+            # for a full model, we have k - 1 (k weigths, but one constraint of
+            # normality) + k * d (means) + k * d * d / 2 (each covariance
+            # matrice has d **2 params, but with positivity constraint)
             if self.gm.d == 1:
-                free_deg    = self.gm.k * 3 - 1
+                free_deg = self.gm.k * 3 - 1
             else:
-                free_deg    = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
+                free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
 
         lk  = self.likelihood(data)
         n   = N.shape(data)[0]
@@ -261,21 +271,32 @@
         pass
     
     def train(self, data, model, maxiter = 10, thresh = 1e-5):
-        """
-        Train a model using data, and stops when the likelihood fails
-        behind a threshold, or when the number of iterations > niter, 
-        whichever comes first
+        """Train a model using EM.
 
-        Args:
-            - data:     contains the observed features, one row is one frame, ie one 
-            observation of dimension d
-            - model:    object of class Mixture
-            - maxiter:  maximum number of iterations
+        Train a model using data, and stops when the likelihood increase
+        between two consecutive iteration fails behind a threshold, or when the
+        number of iterations > niter, whichever comes first
 
-        The model is trained, and its parameters updated accordingly.
+        :Parameters:
+            data : ndarray
+                contains the observed features, one row is one frame, ie one
+                observation of dimension d
+            model : GMM
+                GMM instance.
+            maxiter : int
+                maximum number of iterations
+            thresh : threshold
+                if the slope of the likelihood falls below this value, the
+                algorithm stops.
 
-        Returns:
-            likelihood (one value per iteration).
+        :Returns:
+            likelihood : ndarray
+                one value per iteration.
+
+        Note
+        ----
+        The model is trained, and its parameters updated accordingly, eg the
+        results are put in the GMM instance.
         """
         if not isinstance(model, MixtureModel):
             raise TypeError("expect a MixtureModel as a model")
@@ -296,62 +317,24 @@
             model.update_em(data, g)
             if has_em_converged(like[i], like[i-1], thresh):
                 return like[0:i]
-        # # Em computation, with computation of the likelihood
-        # g, tgd      = model.sufficient_statistics(data)
-        # like[0]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        # model.update_em(data, g)
-        # for i in range(1, maxiter):
-        #     print "=== Iteration %d ===" % i
-        #     isreg   = False
-        #     for j in range(model.gm.k):
-        #         va  = model.gm.va[j]
-        #         if va.any() < _MIN_INV_COND:
-        #             isreg   = True
-        #             print "\tregularization detected"
-        #             print "\t" + str(va)
-        #             model.gm.va[j]  = regularize_diag(va)
-        #             print "\t" + str(va) + ", " + str(model.gm.va[j])
-        #             print "\t" + str(gauss_den(data, model.gm.mu[j], model.gm.va[j]))
-        #             print "\tend regularization detected"
-        #             var = va
-        #         
-        #     g, tgd      = model.sufficient_statistics(data)
-        #     try:
-        #         assert not( (N.isnan(tgd)).any() )
-        #         if isreg:
-        #             print var
-        #     except AssertionError:
-        #         print "tgd is nan..."
-        #         print model.gm.va[13,:]
-        #         print 1/model.gm.va[13,:]
-        #         print densities.gauss_den(data, model.gm.mu[13], model.gm.va[13])
-        #         print N.isnan((multiple_gauss_den(data, model.gm.mu, model.gm.va))).any()
-        #         print "Exciting"
-        #         import sys
-        #         sys.exit(-1)
-        #     like[i]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        #     model.update_em(data, g)
-        #     assert not( model.gm.va.any() < 1e-6)
-        #     if has_em_converged(like[i], like[i-1], thresh):
-        #         return like[0:i]
 
         return like
     
-def regularize_diag(variance, alpha = _DEF_ALPHA):
-    delta   = N.sum(variance) / variance.size
-    if delta > _MIN_DBL_DELTA:
-        return variance + alpha * delta
-    else:
-        return variance + alpha * _MIN_DBL_DELTA
+#def regularize_diag(variance, alpha = _DEF_ALPHA):
+#    delta   = N.sum(variance) / variance.size
+#    if delta > _MIN_DBL_DELTA:
+#        return variance + alpha * delta
+#    else:
+#        return variance + alpha * _MIN_DBL_DELTA
+#
+#def regularize_full(variance):
+#    # Trace of a positive definite matrix is always > 0
+#    delta   = N.trace(variance) / variance.shape[0]
+#    if delta > _MIN_DBL_DELTA:
+#        return variance + alpha * delta
+#    else:
+#        return variance + alpha * _MIN_DBL_DELTA
 
-def regularize_full(variance):
-    # Trace of a positive definite matrix is always > 0
-    delta   = N.trace(variance) / variance.shape[0]
-    if delta > _MIN_DBL_DELTA:
-        return variance + alpha * delta
-    else:
-        return variance + alpha * _MIN_DBL_DELTA
-
 # Misc functions
 def bic(lk, deg, n):
     """ Expects lk to be log likelihood """
@@ -369,127 +352,129 @@
         return False
 
 if __name__ == "__main__":
-    import copy
-    #=============================
-    # Simple GMM with 5 components
-    #=============================
+    pass
+    ## import copy
+    ## #=============================
+    ## # Simple GMM with 5 components
+    ## #=============================
 
-    #+++++++++++++++++++++++++++++
-    # Meta parameters of the model
-    #   - k: Number of components
-    #   - d: dimension of each Gaussian
-    #   - mode: Mode of covariance matrix: full or diag
-    #   - nframes: number of frames (frame = one data point = one
-    #   row of d elements
-    k       = 2 
-    d       = 1
-    mode    = 'full'
-    nframes = 1e3
+    ## #+++++++++++++++++++++++++++++
+    ## # Meta parameters of the model
+    ## #   - k: Number of components
+    ## #   - d: dimension of each Gaussian
+    ## #   - mode: Mode of covariance matrix: full or diag
+    ## #   - nframes: number of frames (frame = one data point = one
+    ## #   row of d elements
+    ## k       = 2 
+    ## d       = 1
+    ## mode    = 'full'
+    ## nframes = 1e3
 
-    #+++++++++++++++++++++++++++++++++++++++++++
-    # Create an artificial GMM model, samples it
-    #+++++++++++++++++++++++++++++++++++++++++++
-    print "Generating the mixture"
-    # Generate a model with k components, d dimensions
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    gm          = GM(d, k, mode)
-    gm.set_param(w, mu, va)
+    ## #+++++++++++++++++++++++++++++++++++++++++++
+    ## # Create an artificial GMM model, samples it
+    ## #+++++++++++++++++++++++++++++++++++++++++++
+    ## print "Generating the mixture"
+    ## # Generate a model with k components, d dimensions
+    ## w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
+    ## gm          = GM(d, k, mode)
+    ## gm.set_param(w, mu, va)
 
-    # Sample nframes frames  from the model
-    data    = gm.sample(nframes)
+    ## # Sample nframes frames  from the model
+    ## data    = gm.sample(nframes)
 
-    #++++++++++++++++++++++++
-    # Learn the model with EM
-    #++++++++++++++++++++++++
+    ## #++++++++++++++++++++++++
+    ## # Learn the model with EM
+    ## #++++++++++++++++++++++++
 
-    # Init the model
-    print "Init a model for learning, with kmean for initialization"
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-    gmm.init(data)
+    ## # Init the model
+    ## print "Init a model for learning, with kmean for initialization"
+    ## lgm = GM(d, k, mode)
+    ## gmm = GMM(lgm, 'kmean')
+    ## gmm.init(data)
 
-    # Keep the initialized model for drawing
-    gm0 = copy.copy(lgm)
+    ## # Keep the initialized model for drawing
+    ## gm0 = copy.copy(lgm)
 
-    # The actual EM, with likelihood computation
-    niter   = 10
-    like    = N.zeros(niter)
+    ## # The actual EM, with likelihood computation
+    ## niter   = 10
+    ## like    = N.zeros(niter)
 
-    print "computing..."
-    for i in range(niter):
-        g, tgd  = gmm.sufficient_statistics(data)
-        like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        gmm.update_em(data, g)
-    # # Alternative form, by using EM class: as the EM class
-    # # is quite rudimentary now, it is not very useful, just save
-    # # a few lines
-    # em      = EM()
-    # like    = em.train(data, gmm, niter)
+    ## print "computing..."
+    ## for i in range(niter):
+    ##     g, tgd  = gmm.sufficient_statistics(data)
+    ##     like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+    ##     gmm.update_em(data, g)
+    ## # # Alternative form, by using EM class: as the EM class
+    ## # # is quite rudimentary now, it is not very useful, just save
+    ## # # a few lines
+    ## # em      = EM()
+    ## # like    = em.train(data, gmm, niter)
 
-    #+++++++++++++++
-    # Draw the model
-    #+++++++++++++++
-    print "drawing..."
-    import pylab as P
-    P.subplot(2, 1, 1)
+    ## #+++++++++++++++
+    ## # Draw the model
+    ## #+++++++++++++++
+    ## print "drawing..."
+    ## import pylab as P
+    ## P.subplot(2, 1, 1)
 
-    if not d == 1:
-        # Draw what is happening
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+    ## if not d == 1:
+    ##     # Draw what is happening
+    ##     P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
 
-        # Real confidence ellipses
-        Xre, Yre  = gm.conf_ellipses()
-        P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
-        for i in range(1,k):
-            P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
+    ##     # Real confidence ellipses
+    ##     Xre, Yre  = gm.conf_ellipses()
+    ##     P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
+    ##     for i in range(1,k):
+    ##         P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
 
-        # Initial confidence ellipses as found by kmean
-        X0e, Y0e  = gm0.conf_ellipses()
-        P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
-        for i in range(1,k):
-            P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
+    ##     # Initial confidence ellipses as found by kmean
+    ##     X0e, Y0e  = gm0.conf_ellipses()
+    ##     P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
+    ##     for i in range(1,k):
+    ##         P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
 
-        # Values found by EM
-        Xe, Ye  = lgm.conf_ellipses()
-        P.plot(Xe[0], Ye[0], 'r', label = 'confidence ellipsoides found by EM')
-        for i in range(1,k):
-            P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
-        P.legend(loc = 0)
-    else:
-        # Real confidence ellipses
-        h   = gm.plot1d()
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
+    ##     # Values found by EM
+    ##     Xe, Ye  = lgm.conf_ellipses()
+    ##     P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by"
+    ##      "EM")
+    ##     for i in range(1,k):
+    ##         P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
+    ##     P.legend(loc = 0)
+    ## else:
+    ##     # Real confidence ellipses
+    ##     h   = gm.plot1d()
+    ##     [i.set_color('g') for i in h['pdf']]
+    ##     h['pdf'][0].set_label('true pdf')
 
-        # Initial confidence ellipses as found by kmean
-        h0  = gm0.plot1d()
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
+    ##     # Initial confidence ellipses as found by kmean
+    ##     h0  = gm0.plot1d()
+    ##     [i.set_color('k') for i in h0['pdf']]
+    ##     h0['pdf'][0].set_label('initial pdf')
 
-        # Values found by EM
-        hl  = lgm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
+    ##     # Values found by EM
+    ##     hl  = lgm.plot1d(fill = 1, level = 0.66)
+    ##     [i.set_color('r') for i in hl['pdf']]
+    ##     hl['pdf'][0].set_label('pdf found by EM')
 
-        P.legend(loc = 0)
+    ##     P.legend(loc = 0)
 
-    P.subplot(2, 1, 2)
-    P.plot(like)
-    P.title('log likelihood')
+    ## P.subplot(2, 1, 2)
+    ## P.plot(like)
+    ## P.title('log likelihood')
 
-    # #++++++++++++++++++
-    # # Export the figure
-    # #++++++++++++++++++
-    # F   = P.gcf()
-    # DPI = F.get_dpi()
-    # DefaultSize = F.get_size_inches()
-    # # the default is 100dpi for savefig:
-    # F.savefig("example1.png")
+    ## # #++++++++++++++++++
+    ## # # Export the figure
+    ## # #++++++++++++++++++
+    ## # F   = P.gcf()
+    ## # DPI = F.get_dpi()
+    ## # DefaultSize = F.get_size_inches()
+    ## # # the default is 100dpi for savefig:
+    ## # F.savefig("example1.png")
 
-    # # Now make the image twice as big, while keeping the fonts and all the
-    # # same size
-    # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
-    # Size = F.get_size_inches()
-    # print "Size in Inches", Size
-    # F.savefig("example2.png")
-    P.show()
+    ## # # Now make the image twice as big, while keeping the fonts and all the
+    ## # # same size
+    ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
+    ## # Size = F.get_size_inches()
+    ## # print "Size in Inches", Size
+    ## # F.savefig("example2.png")
+    ## P.show()

Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/info.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,61 +1,63 @@
 """
-Routines for Gaussian Mixture Models
-and learning with Expectation Maximization 
-==========================================
+Routines for Gaussian Mixture Models and learning with Expectation Maximization 
+===============================================================================
 
-This module contains classes and function to compute multivariate Gaussian densities
-(diagonal and full covariance matrices), Gaussian mixtures, Gaussian mixtures models
-and an Em trainer.
+This module contains classes and function to compute multivariate Gaussian
+densities (diagonal and full covariance matrices), Gaussian mixtures, Gaussian
+mixtures models and an Em trainer.
 
 More specifically, the module defines the following classes, functions:
 
 - densities.gauss_den: function to compute multivariate Gaussian pdf 
-- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can be
-created from its parameters weights, mean and variances, or from its meta parameters
-d (dimension of the Gaussian) and k (number of components in the mixture). A Gaussian
-Model can then be sampled or plot (if d>1, plot confidence ellipsoids projected on 
-2 chosen dimensions, if d == 1, plot the pdf of each component and fill the zone
-of confidence for a given level)
-- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is constructed
-from a GM model gm, and can be used to train gm. The GMM can be initiated by
-kmean or at random, and can compute sufficient statistics, and update its parameters
-from the sufficient statistics.
-- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq kmeans, since
-its does not give membership of observations.
+- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can
+  be created from its parameters weights, mean and variances, or from its meta
+  parameters d (dimension of the Gaussian) and k (number of components in the
+  mixture). A Gaussian Model can then be sampled or plot (if d>1, plot
+  confidence ellipsoids projected on 2 chosen dimensions, if d == 1, plot the
+  pdf of each component and fill the zone of confidence for a given level)
+- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is
+  constructed from a GM model gm, and can be used to train gm. The GMM can be
+  initiated by kmean or at random, and can compute sufficient statistics, and
+  update its parameters from the sufficient statistics.
+- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq
+  kmeans, since its does not give membership of observations.
 
 Example of use: 
-    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    # Create an artificial 2 dimension, 3 clusters GM model, samples it
-    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    w, mu, va   = GM.gen_param(2, 3, 'diag', spread = 1.5)
-    gm          = GM.fromvalues(w, mu, va)
+---------------
 
-    # Sample 1000 frames  from the model
-    data    = gm.sample(1000)
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> # Create an artificial 2 dimension, 3 clusters GM model, samples it
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> w, mu, va   = GM.gen_param(2, 3, 'diag', spread = 1.5)
+>>> gm          = GM.fromvalues(w, mu, va)
+>>> 
+>>> # Sample 1000 frames  from the model
+>>> data    = gm.sample(1000)
+>>> 
+>>> #++++++++++++++++++++++++
+>>> # Learn the model with EM
+>>> #++++++++++++++++++++++++
+>>> # Init the model
+>>> lgm = GM(d, k, mode)
+>>> gmm = GMM(lgm, 'kmean')
+>>> 
+>>> # The actual EM, with likelihood computation. The threshold
+>>> # is compared to the (linearly appromixated) derivative of the likelihood
+>>> em      = EM()
+>>> like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
 
-    #++++++++++++++++++++++++
-    # Learn the model with EM
-    #++++++++++++++++++++++++
-    # Init the model
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-
-    # The actual EM, with likelihood computation. The threshold
-    # is compared to the (linearly appromixated) derivative of the likelihood
-    em      = EM()
-    like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
 Files example.py and example2.py show more capabilities of the toolbox, including
 plotting capabilities (using matplotlib) and model selection using Bayesian 
 Information Criterion (BIC).
 
 Bibliography:
-    * Maximum likelihood from incomplete data via the EM algorithm in Journal of 
-    the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P. Dempster, 
-    N. M. Laird, and D. B. Rubin
-    * Bayesian Approaches to Gaussian Mixture Modelling (1998) by 
-    Stephen J. Roberts, Dirk Husmeier, Iead Rezek, William Penny in 
-    IEEE Transactions on Pattern Analysis and Machine Intelligence
+
+- Maximum likelihood from incomplete data via the EM algorithm in Journal of
+  the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P.
+  Dempster, N. M. Laird, and D. B. Rubin
+- Bayesian Approaches to Gaussian Mixture Modelling (1998) by Stephen J.
+  Roberts, Dirk Husmeier, Iead Rezek, William Penny in IEEE Transactions on
+  Pattern Analysis and Machine Intelligence
      
 Copyright: David Cournapeau 2006
 License: BSD-style (see LICENSE.txt in main source directory)

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,27 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
-#---------------------------------------------
-# This is not meant to be used yet !!!! I am 
-# not sure how to integrate this stuff inside
-# the package yet. The cases are:
-#   - we have a set of data, and we want to test online EM 
-#   compared to normal EM 
-#   - we do not have all the data before putting them in online EM:
-#   eg current frame depends on previous frame in some way.
+# This is not meant to be used yet !!!! I am not sure how to integrate this
+# stuff inside the package yet. The cases are:
+#   - we have a set of data, and we want to test online EM compared to normal
+#   EM 
+#   - we do not have all the data before putting them in online EM: eg current
+#   frame depends on previous frame in some way.
 
 # TODO:
 #   - Add biblio
-#   - Look back at articles for discussion for init, regularization and 
+#   - Look back at articles for discussion for init, regularization and
 #   convergence rates
-#   - the function sufficient_statistics does not really return SS. This is not a
-#   big problem, but it would be better to really return them as the name implied.
+#   - the function sufficient_statistics does not really return SS. This is not
+#   a big problem, but it would be better to really return them as the name
+#   implied.
 
 import numpy as N
 from numpy import mean
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 
-from gmm_em import ExpMixtureModel, GMM, EM
-from gauss_mix import GM
+from gmm_em import ExpMixtureModel#, GMM, EM
+#from gauss_mix import GM
 from scipy.cluster.vq import kmeans2 as kmean
 import densities2 as D
 
@@ -60,22 +59,24 @@
         k   = self.gm.k
         d   = self.gm.d
         if self.gm.mode == 'diag':
-            w           = N.ones(k) / k
+            w  = N.ones(k) / k
 
             # Init the internal state of EM
-            self.cx     = N.outer(w, mean(init_data, 0))
-            self.cxx    = N.outer(w, mean(init_data ** 2, 0))
+            self.cx = N.outer(w, mean(init_data, 0))
+            self.cxx = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
-            mu          = code.copy()
-            va          = N.zeros((k, d))
+            (code, label) = kmean(init_data, init_data[0:k, :], iter = 10,
+                    minit = 'matrix')
+            mu = code.copy()
+            va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+                    va [i, j] = N.cov(init_data[N.where(label==i), j], 
+                            rowvar = 0)
         else:
             raise OnGmmParamError("""init_online not implemented for
-                    mode %s yet""", mode)
+                    mode %s yet""", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         # c* are the parameters which are computed at every step (ie
@@ -95,22 +96,24 @@
         k   = self.gm.k
         d   = self.gm.d
         if self.gm.mode == 'diag':
-            w           = N.ones(k) / k
+            w  = N.ones(k) / k
 
             # Init the internal state of EM
-            self.cx     = N.outer(w, mean(init_data, 0))
-            self.cxx    = N.outer(w, mean(init_data ** 2, 0))
+            self.cx = N.outer(w, mean(init_data, 0))
+            self.cxx = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
-            mu          = code.copy()
-            va          = N.zeros((k, d))
+            (code, label) = kmean(init_data, init_data[0:k, :], 
+                    iter = niter, minit = 'matrix')
+            mu = code.copy()
+            va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+                    va[i, j] = N.cov(init_data[N.where(label==i), j], 
+                            rowvar = 0)
         else:
             raise OnGmmParamError("""init_online not implemented for
-                    mode %s yet""", mode)
+                    mode %s yet""", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         # c* are the parameters which are computed at every step (ie
@@ -278,132 +281,133 @@
         
 
 if __name__ == '__main__':
-    d       = 1
-    k       = 2
-    mode    = 'diag'
-    nframes = int(5e3)
-    emiter  = 4
-    seed(5)
+    pass
+    #d       = 1
+    #k       = 2
+    #mode    = 'diag'
+    #nframes = int(5e3)
+    #emiter  = 4
+    #seed(5)
 
-    #+++++++++++++++++++++++++++++++++++++++++++++++++
-    # Generate a model with k components, d dimensions
-    #+++++++++++++++++++++++++++++++++++++++++++++++++
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-    gm          = GM.fromvalues(w, mu, va)
-    # Sample nframes frames  from the model
-    data        = gm.sample(nframes)
+    ##+++++++++++++++++++++++++++++++++++++++++++++++++
+    ## Generate a model with k components, d dimensions
+    ##+++++++++++++++++++++++++++++++++++++++++++++++++
+    #w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
+    #gm          = GM.fromvalues(w, mu, va)
+    ## Sample nframes frames  from the model
+    #data        = gm.sample(nframes)
 
-    #++++++++++++++++++++++++++++++++++++++++++
-    # Approximate the models with classical EM
-    #++++++++++++++++++++++++++++++++++++++++++
-    # Init the model
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-    gmm.init(data)
+    ##++++++++++++++++++++++++++++++++++++++++++
+    ## Approximate the models with classical EM
+    ##++++++++++++++++++++++++++++++++++++++++++
+    ## Init the model
+    #lgm = GM(d, k, mode)
+    #gmm = GMM(lgm, 'kmean')
+    #gmm.init(data)
 
-    gm0    = copy.copy(gmm.gm)
-    # The actual EM, with likelihood computation
-    like    = N.zeros(emiter)
-    for i in range(emiter):
-        g, tgd  = gmm.sufficient_statistics(data)
-        like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        gmm.update_em(data, g)
+    #gm0    = copy.copy(gmm.gm)
+    ## The actual EM, with likelihood computation
+    #like    = N.zeros(emiter)
+    #for i in range(emiter):
+    #    g, tgd  = gmm.sufficient_statistics(data)
+    #    like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+    #    gmm.update_em(data, g)
 
-    #++++++++++++++++++++++++++++++++++++++++
-    # Approximate the models with online EM
-    #++++++++++++++++++++++++++++++++++++++++
-    ogm     = GM(d, k, mode)
-    ogmm    = OnGMM(ogm, 'kmean')
-    init_data   = data[0:nframes / 20, :]
-    ogmm.init(init_data)
+    ##++++++++++++++++++++++++++++++++++++++++
+    ## Approximate the models with online EM
+    ##++++++++++++++++++++++++++++++++++++++++
+    #ogm     = GM(d, k, mode)
+    #ogmm    = OnGMM(ogm, 'kmean')
+    #init_data   = data[0:nframes / 20, :]
+    #ogmm.init(init_data)
 
-    # Forgetting param
-    ku		= 0.005
-    t0		= 200
-    lamb	= 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
-    nu0		= 0.2
-    nu		= N.zeros((len(lamb), 1))
-    nu[0]	= nu0
-    for i in range(1, len(lamb)):
-        nu[i]	= 1./(1 + lamb[i] / nu[i-1])
+    ## Forgetting param
+    #ku		= 0.005
+    #t0		= 200
+    #lamb	= 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+    #nu0		= 0.2
+    #nu		= N.zeros((len(lamb), 1))
+    #nu[0]	= nu0
+    #for i in range(1, len(lamb)):
+    #    nu[i]	= 1./(1 + lamb[i] / nu[i-1])
 
-    print "meth1"
-    # object version of online EM
-    for t in range(nframes):
-        ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
-        ogmm.update_em_frame()
+    #print "meth1"
+    ## object version of online EM
+    #for t in range(nframes):
+    #    ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
+    #    ogmm.update_em_frame()
 
-    ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+    #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
 
-    # 1d optimized version
-    ogm2        = GM(d, k, mode)
-    ogmm2       = OnGMM1d(ogm2, 'kmean')
-    ogmm2.init(init_data[:, 0])
+    ## 1d optimized version
+    #ogm2        = GM(d, k, mode)
+    #ogmm2       = OnGMM1d(ogm2, 'kmean')
+    #ogmm2.init(init_data[:, 0])
 
-    print "meth2"
-    # object version of online EM
-    for t in range(nframes):
-        ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
-        ogmm2.update_em_frame()
+    #print "meth2"
+    ## object version of online EM
+    #for t in range(nframes):
+    #    ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
+    #    ogmm2.update_em_frame()
 
-    #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
+    ##ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
 
-    print ogmm.cw
-    print ogmm2.cw
-    #+++++++++++++++
-    # Draw the model
-    #+++++++++++++++
-    print "drawing..."
-    import pylab as P
-    P.subplot(2, 1, 1)
+    #print ogmm.cw
+    #print ogmm2.cw
+    ##+++++++++++++++
+    ## Draw the model
+    ##+++++++++++++++
+    #print "drawing..."
+    #import pylab as P
+    #P.subplot(2, 1, 1)
 
-    if not d == 1:
-        # Draw what is happening
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+    #if not d == 1:
+    #    # Draw what is happening
+    #    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
 
-        h   = gm.plot()    
-        [i.set_color('g') for i in h]
-        h[0].set_label('true confidence ellipsoides')
+    #    h   = gm.plot()    
+    #    [i.set_color('g') for i in h]
+    #    h[0].set_label('true confidence ellipsoides')
 
-        h   = gm0.plot()    
-        [i.set_color('k') for i in h]
-        h[0].set_label('initial confidence ellipsoides')
+    #    h   = gm0.plot()    
+    #    [i.set_color('k') for i in h]
+    #    h[0].set_label('initial confidence ellipsoides')
 
-        h   = lgm.plot()    
-        [i.set_color('r') for i in h]
-        h[0].set_label('confidence ellipsoides found by EM')
+    #    h   = lgm.plot()    
+    #    [i.set_color('r') for i in h]
+    #    h[0].set_label('confidence ellipsoides found by EM')
 
-        h   = ogmm.gm.plot()    
-        [i.set_color('m') for i in h]
-        h[0].set_label('confidence ellipsoides found by Online EM')
+    #    h   = ogmm.gm.plot()    
+    #    [i.set_color('m') for i in h]
+    #    h[0].set_label('confidence ellipsoides found by Online EM')
 
-        # P.legend(loc = 0)
-    else:
-        # Real confidence ellipses
-        h   = gm.plot1d()
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
+    #    # P.legend(loc = 0)
+    #else:
+    #    # Real confidence ellipses
+    #    h   = gm.plot1d()
+    #    [i.set_color('g') for i in h['pdf']]
+    #    h['pdf'][0].set_label('true pdf')
 
-        # Initial confidence ellipses as found by kmean
-        h0  = gm0.plot1d()
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
+    #    # Initial confidence ellipses as found by kmean
+    #    h0  = gm0.plot1d()
+    #    [i.set_color('k') for i in h0['pdf']]
+    #    h0['pdf'][0].set_label('initial pdf')
 
-        # Values found by EM
-        hl  = lgm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
+    #    # Values found by EM
+    #    hl  = lgm.plot1d(fill = 1, level = 0.66)
+    #    [i.set_color('r') for i in hl['pdf']]
+    #    hl['pdf'][0].set_label('pdf found by EM')
 
-        P.legend(loc = 0)
+    #    P.legend(loc = 0)
 
-        # Values found by Online EM
-        hl  = ogmm.gm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('m') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by Online EM')
+    #    # Values found by Online EM
+    #    hl  = ogmm.gm.plot1d(fill = 1, level = 0.66)
+    #    [i.set_color('m') for i in hl['pdf']]
+    #    hl['pdf'][0].set_label('pdf found by Online EM')
 
-        P.legend(loc = 0)
+    #    P.legend(loc = 0)
 
-    P.subplot(2, 1, 2)
-    P.plot(nu)
-    P.title('Learning rate')
-    P.show()
+    #P.subplot(2, 1, 2)
+    #P.plot(nu)
+    #P.title('Learning rate')
+    #P.show()


From scipy-svn at scipy.org  Sun Jun 10 05:37:03 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sun, 10 Jun 2007 04:37:03 -0500 (CDT)
Subject: [Scipy-svn] r3088 - trunk/Lib/sandbox/pyem
Message-ID: <20070610093703.F3CE939C018@new.scipy.org>

Author: cdavid
Date: 2007-06-10 04:36:59 -0500 (Sun, 10 Jun 2007)
New Revision: 3088

Modified:
   trunk/Lib/sandbox/pyem/gmm_em.py
Log:
Add special initialization method for mixture models for testing purpose.

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-09 14:03:01 UTC (rev 3087)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-10 09:36:59 UTC (rev 3088)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Sun Jun 10 06:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -103,6 +103,21 @@
         
         self.isinit = True
 
+    def init_test(self, data):
+        """Use values already in the model as initialization.
+        
+        Useful for testing purpose when reproducability is necessary."""
+        try:
+            if self.gm.check_state():
+                self.isinit = True
+            else:
+                raise GmParamError("the mixture is initialized, but the"\
+                        "parameters are not valid")
+
+        except GmParamError, e:
+            print "Model is not properly initalized, cannot init EM."
+            raise "Message was %s" % str(e)
+        
     # TODO: 
     #   - format of parameters ? For variances, list of variances matrix,
     #   keep the current format, have 3d matrices ?
@@ -118,13 +133,12 @@
             gm : GM
                 the mixture model to train.
             init : string
-                initialization method to use.
-        
-        """
+                initialization method to use."""
         self.gm = gm
 
         # Possible init methods
-        init_methods = {'kmean': self.init_kmean, 'random' : self.init_random}
+        init_methods = {'kmean': self.init_kmean, 'random' : self.init_random,
+                'test': self.init_test}
 
         if init not in init_methods:
             raise GmmParamError('init method %s not recognized' + str(init))


From scipy-svn at scipy.org  Sun Jun 10 12:28:29 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Sun, 10 Jun 2007 11:28:29 -0500 (CDT)
Subject: [Scipy-svn] r3089 - in trunk/Lib/sandbox/rbf: . tests
Message-ID: <20070610162829.4072C39C1A0@new.scipy.org>

Author: jtravs
Date: 2007-06-10 11:28:20 -0500 (Sun, 10 Jun 2007)
New Revision: 3089

Modified:
   trunk/Lib/sandbox/rbf/rbf.py
   trunk/Lib/sandbox/rbf/tests/example1.py
Log:
Updates to sandbox.rbf module.


Modified: trunk/Lib/sandbox/rbf/rbf.py
===================================================================
--- trunk/Lib/sandbox/rbf/rbf.py	2007-06-10 09:36:59 UTC (rev 3088)
+++ trunk/Lib/sandbox/rbf/rbf.py	2007-06-10 16:28:20 UTC (rev 3089)
@@ -56,9 +56,9 @@
     
     def _function(self, r):
         if self.function.lower() == 'multiquadric':
-            return sqrt((self.epsilon*r)**2 + 1)
+            return sqrt((1.0/self.epsilon*r)**2 + 1)
         elif self.function.lower() == 'inverse multiquadric':
-            return 1.0/sqrt((self.epsilon*r)**2 + 1)
+            return 1.0/sqrt((1.0/self.epsilon*r)**2 + 1)
         elif self.function.lower() == 'gausian':
             return exp(-(self.epsilon*r)**2)
         elif self.function.lower() == 'cubic':

Modified: trunk/Lib/sandbox/rbf/tests/example1.py
===================================================================
--- trunk/Lib/sandbox/rbf/tests/example1.py	2007-06-10 09:36:59 UTC (rev 3088)
+++ trunk/Lib/sandbox/rbf/tests/example1.py	2007-06-10 16:28:20 UTC (rev 3089)
@@ -25,26 +25,29 @@
 p.subplot(2,1,2)
 p.plot(x,y,'bo',xi,fi,'g',xi, s.sin(xi),'r')
 p.title('RBF interpolation - multiquadrics')
-p.show()
+p.savefig('rbf1d.png')
+p.close()
 
 # 2-d tests - setup scattered data
-x = s.rand(50,1)*4-2
-y = s.rand(50,1)*4-2
+x = s.rand(100)*4.0-2.0
+y = s.rand(100)*4.0-2.0
 z = x*s.exp(-x**2-y**2)
-ti = s.linspace(-2.0,2.0,81)
+ti = s.linspace(-2.0,2.0,100)
 (XI,YI) = s.meshgrid(ti,ti)
 
 # use RBF
-rbf = Rbf(x.flatten(),y.flatten(),z.flatten(),eps=2)
-ZI = rbf(XI.flatten(), YI.flatten())
-ZI.shape = XI.shape
+rbf = Rbf(x,y,z,epsilon=2)
+ZI = rbf(XI, YI)
 
 # plot the result
-from enthought.tvtk.tools import mlab
-f=mlab.figure(browser=False)
-su=mlab.Surf(XI,YI,ZI,ZI,scalar_visibility=True)
-f.add(su)
-su.lut_type='blue-red'
-f.objects[0].axis.z_label='value'
-pp = mlab.Spheres(s.c_[x.flatten(), y.flatten(), z.flatten()],radius=0.03)
-f.add(pp)
+n = p.normalize(-2., 2.)
+p.subplot(1,1,1)
+p.pcolor(XI,YI,ZI,cmap=p.cm.jet)
+p.scatter(x,y,100,z,cmap=p.cm.jet)
+p.title('RBF interpolation - multiquadrics')
+p.xlim(-2,2)
+p.ylim(-2,2)
+p.colorbar()
+p.savefig('rbf2d.png')
+p.close()
+


From scipy-svn at scipy.org  Mon Jun 11 03:01:32 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 02:01:32 -0500 (CDT)
Subject: [Scipy-svn] r3090 - in trunk/Lib/sandbox/pyem: . doc tests
Message-ID: <20070611070132.3185939C050@new.scipy.org>

Author: cdavid
Date: 2007-06-11 02:01:12 -0500 (Mon, 11 Jun 2007)
New Revision: 3090

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/doc/tutorial.pdf
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
Log:
* Correct bogus GM._get_va which caused bogus isodensity plot + test
* Support for plain matrix in GM.check_state


Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 """This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
@@ -246,9 +246,9 @@
     circle  = mahal * N.array([N.cos(theta), N.sin(theta)])
 
     # Get the dimension which we are interested in:
-    mu  = mu[dim]
+    mu  = mu[c]
     if mode == 'diag':
-        va      = va[dim]
+        va      = va[c]
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(N.diag(N.sqrt(va)), circle)
     elif mode == 'full':

Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 
 """Module implementing GM, a class which represents Gaussian mixtures.
 
@@ -296,27 +296,26 @@
         to be positive definite.
         """
         if not self.is_valid:
-            raise GmParamError("""Parameters of the model has not been 
-                set yet, please set them using self.set_param()""")
+            raise GmParamError("Parameters of the model has not been"\
+                "set yet, please set them using self.set_param()")
 
-        if self.mode == 'full':
-            raise NotImplementedError, "not implemented for full mode yet"
-        
-        # # How to check w: if one component is negligeable, what shall
-        # # we do ?
-        # M   = N.max(self.w)
-        # m   = N.min(self.w)
-
-        # maxc    = m / M
-
         # Check condition number for cov matrix
-        cond    = N.zeros(self.k)
-        ava     = N.absolute(self.va)
-        for c in range(self.k):
-            cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
+        if self.mode == 'diag':
+            tinfo = N.finfo(self.va.dtype)
+            if N.any(self.va < tinfo.eps):
+                raise GmParamError("variances are singular")
+        elif self.mode == 'full':
+            try:
+                d = self.d
+                for i in range(self.k):
+                    N.linalg.cholesky(self.va[i*d:i*d+d, :])
+            except N.linalg.LinAlgError:
+                raise GmParamError("matrix %d is singular " % i)
 
-        print cond
+        else:
+            raise GmParamError("Unknown mode")
 
+        return True
     @classmethod
     def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
         """Generate random, valid parameters for a gaussian mixture model.
@@ -341,13 +340,14 @@
         -----
         This is a class method.
         """
-        w   = abs(randn(nc))
+        w   = N.abs(randn(nc))
         w   = w / sum(w, 0)
 
-        mu  = spread * randn(nc, d)
+        mu  = spread * N.sqrt(d) * randn(nc, d)
         if varmode == 'diag':
-            va  = abs(randn(nc, d))
+            va  = N.abs(randn(nc, d))
         elif varmode == 'full':
+            # If A is invertible, A'A is positive definite
             va  = randn(nc * d, d)
             for k in range(nc):
                 va[k*d:k*d+d]   = N.dot( va[k*d:k*d+d], 
@@ -509,7 +509,7 @@
         return retval
 
     def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
-            maxlevel = 0.95):
+            maxlevel = 0.95, V = None):
         """Do all the necessary computation for contour plot of mixture's
         density.
         
@@ -556,8 +556,10 @@
                 N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
         # XXX: how to find "good" values for level ?
-        V = [-5, -3, -1, -0.5, ]
-        V.extend(N.linspace(0, N.max(lden), 4).tolist())
+        if V is None:
+            #V = [-5, -3, -1, -0.5, ]
+            #V.extend(list(N.linspace(0, N.max(lden), 20)))
+            V = N.linspace(-5, N.max(lden), 20)
         return X, Y, lden, N.array(V)
 
     def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
@@ -578,7 +580,8 @@
         return X, Y, den
 
     def _get_va(self, dim):
-        """Returns variance limited do dimension in dim."""
+        """Returns variance limited do 2 dimension in tuple dim."""
+        assert len(dim) == 2
         dim = N.array(dim)
         if dim.any() < 0 or dim.any() >= self.d:
             raise ValueError("dim elements should be between 0 and dimension"\
@@ -586,9 +589,16 @@
         if self.mode == 'diag':
             return self.va[:, dim]
         elif self.mode == 'full':
-            tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
-            tidx.flatten()
-            return self.va[tidx, dim]
+            ld = dim.size
+            vaselid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
+                vaselid[ld*i] = dim[0] + i * self.d
+                vaselid[ld*i+1] = dim[1] + i * self.d
+            vadid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
+                vadid[ld*i] = dim
+                vadid[ld*i+1] = dim
+            return self.va[vaselid, vadid]
         else:
             raise ValueError("Unkown mode")
 

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sun Jun 10 06:00 PM 2007 J
+# Last Change: Mon Jun 11 01:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -20,7 +20,7 @@
 import densities
 #from kmean import kmean
 from scipy.cluster.vq import kmeans2 as kmean
-#from gauss_mix import GM
+from gauss_mix import GmParamError
 
 #from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
 
@@ -91,13 +91,18 @@
         """ Init the model at random."""
         k   = self.gm.k
         d   = self.gm.d
+        w   = N.ones(k) / k
+        mu  = randn(k, d)
         if self.gm.mode == 'diag':
-            w   = N.ones(k) / k
-            mu  = randn(k, d)
             va  = N.fabs(randn(k, d))
         else:
-            raise GmmParamError("init_random not implemented for "
-                    "mode %s yet", self.gm.mode)
+            # If A is invertible, A'A is positive definite
+            va  = randn(k * d, d)
+            for i in range(k):
+                va[i*d:i*d+d]   = N.dot( va[i*d:i*d+d], 
+                    va[i*d:i*d+d].T)
+            #raise GmmParamError("init_random not implemented for "\
+            #        "mode %s yet" % self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         
@@ -106,14 +111,12 @@
     def init_test(self, data):
         """Use values already in the model as initialization.
         
-        Useful for testing purpose when reproducability is necessary."""
+        Useful for testing purpose when reproducability is necessary. This does
+        nothing but checking that the mixture model has valid initial
+        values."""
+        # We have
         try:
-            if self.gm.check_state():
-                self.isinit = True
-            else:
-                raise GmParamError("the mixture is initialized, but the"\
-                        "parameters are not valid")
-
+            self.gm.check_state()
         except GmParamError, e:
             print "Model is not properly initalized, cannot init EM."
             raise "Message was %s" % str(e)

Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -41,6 +41,23 @@
         except ValueError, e:
             print "Ok, density_grid failed as expected (with msg: " + str(e) + ")"
 
+    def test_get_va(self):
+        """Test _get_va for diag and full mode."""
+        d = 3
+        k = 2
+        ld = 2
+        dim = [0, 2]
+        w, mu, va = GM.gen_param(d, k, 'full')
+        va = N.arange(d*d*k).reshape(d*k, d)
+        gm = GM.fromvalues(w, mu, va)
 
+        tva = N.empty(ld * ld * k)
+        for i in range(k * ld * ld):
+            tva[i] = dim[i%ld] + (i % 4)/ ld  * dim[1] * d + d*d * (i / (ld*ld))
+        tva = tva.reshape(ld * k, ld)
+        sva = gm._get_va(dim)
+        assert N.all(sva == tva)
+
+
 if __name__ == "__main__":
     NumpyTest().run()


From scipy-svn at scipy.org  Mon Jun 11 03:07:48 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 02:07:48 -0500 (CDT)
Subject: [Scipy-svn] r3091 - trunk/Lib/sandbox/pyem/examples
Message-ID: <20070611070748.42C3639C050@new.scipy.org>

Author: cdavid
Date: 2007-06-11 02:07:43 -0500 (Mon, 11 Jun 2007)
New Revision: 3091

Added:
   trunk/Lib/sandbox/pyem/examples/plotexamples.py
Log:
Add a plotting example.

Added: trunk/Lib/sandbox/pyem/examples/plotexamples.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/plotexamples.py	2007-06-11 07:01:12 UTC (rev 3090)
+++ trunk/Lib/sandbox/pyem/examples/plotexamples.py	2007-06-11 07:07:43 UTC (rev 3091)
@@ -0,0 +1,42 @@
+#! /usr/bin/env python
+# Last Change: Mon Jun 11 03:00 PM 2007 J
+
+# This is a simple test to check whether plotting ellipsoides of confidence and
+# isodensity contours match
+import numpy as N
+from numpy.testing import set_package_path, restore_path
+
+import pylab as P
+
+set_package_path()
+import pyem
+restore_path()
+
+# Generate a simple mixture model, plot its confidence ellipses + isodensity
+# curves for both diagonal and full covariance matrices
+d = 3
+k = 3
+dim = [0, 2]
+# diag model
+w, mu, va = pyem.GM.gen_param(d, k)
+dgm = pyem.GM.fromvalues(w, mu, va)
+# full model
+w, mu, va = pyem.GM.gen_param(d, k, 'full', spread = 1)
+fgm = pyem.GM.fromvalues(w, mu, va)
+
+def plot_model(gm, dim):
+    X, Y, Z, V = gm.density_on_grid(dim = dim)
+    h = gm.plot(dim = dim)
+    [i.set_linestyle('-.') for i in h]
+    P.contour(X, Y, Z, V)
+    data = gm.sample(200)
+    P.plot(data[:, dim[0]], data[:,dim[1]], '.')
+
+# Plot the contours and the ellipsoids of confidence
+P.subplot(2, 1, 1)
+plot_model(dgm, dim)
+
+P.subplot(2, 1, 2)
+plot_model(fgm, dim)
+
+P.show()


From scipy-svn at scipy.org  Mon Jun 11 03:10:11 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 02:10:11 -0500 (CDT)
Subject: [Scipy-svn] r3092 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070611071011.4332D39C050@new.scipy.org>

Author: cdavid
Date: 2007-06-11 02:10:03 -0500 (Mon, 11 Jun 2007)
New Revision: 3092

Removed:
   trunk/Lib/sandbox/pyem/tests/generate_test_data.py
Log:
Remote outdated test script.

Deleted: trunk/Lib/sandbox/pyem/tests/generate_test_data.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/generate_test_data.py	2007-06-11 07:07:43 UTC (rev 3091)
+++ trunk/Lib/sandbox/pyem/tests/generate_test_data.py	2007-06-11 07:10:03 UTC (rev 3092)
@@ -1,53 +0,0 @@
-# Last Change: Wed Oct 18 06:00 PM 2006 J
-
-import numpy as N
-import tables as T
-
-from numpy.random import seed
-
-from gmm_em import multiple_gauss_den
-from gauss_mix import GM
-from _c_densities import gauss_den
-
-filename    = 'test_mgden.h5';
-h5file      = T.openFile(filename, 'w')
-h5file.createGroup(h5file.root, 'hyperparams')
-h5file.createGroup(h5file.root, 'params')
-h5file.createGroup(h5file.root, 'data')
-
-d       = 1
-k       = 2
-type    = 'diag'
-nframes = int(1e3)
-
-h5file.createArray(h5file.root.hyperparams, 'dimension', d)
-h5file.createArray(h5file.root.hyperparams, 'type', type)
-h5file.createArray(h5file.root.hyperparams, 'nclusters', k)
-
-w, mu, va   = GM.gen_param(d, k, type)
-
-h5file.createArray(h5file.root.params, 'weights', w)
-h5file.createArray(h5file.root.params, 'means', mu)
-h5file.createArray(h5file.root.params, 'variances', va)
-
-gm      = GM.fromvalues(w, mu, va)
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-h5file.createArray(h5file.root.data, 'data', data)
-
-w1, mu1, va1    = GM.gen_param(d, k, type)
-
-out     = multiple_gauss_den(data, mu1, va1)
-out1    = gauss_den(data, mu1[0, :], va1[0, :])
-
-h5file.createArray(h5file.root.params, 'w', w1)
-h5file.createArray(h5file.root.params, 'mu', mu1)
-h5file.createArray(h5file.root.params, 'va', va1)
-h5file.createArray(h5file.root.data, 'out', out)
-
-h5file.createArray(h5file.root.params, 'mu1', mu1[0,:])
-h5file.createArray(h5file.root.params, 'va1', va1[0,:])
-h5file.createArray(h5file.root.data, 'out1', out1)
-
-h5file.close()


From scipy-svn at scipy.org  Mon Jun 11 05:18:31 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 04:18:31 -0500 (CDT)
Subject: [Scipy-svn] r3093 - trunk/Lib/sandbox/pyem/doc
Message-ID: <20070611091831.C426239C110@new.scipy.org>

Author: cdavid
Date: 2007-06-11 04:18:25 -0500 (Mon, 11 Jun 2007)
New Revision: 3093

Modified:
   trunk/Lib/sandbox/pyem/doc/
Log:
Add tex output files in ignore list for svn


Property changes on: trunk/Lib/sandbox/pyem/doc
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so
*.prof
*.out
*.tex


   + *.pyc
*.swp
*.pyd
*.so
*.prof
*.out
*.tex
*.aux


From scipy-svn at scipy.org  Mon Jun 11 05:19:13 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 04:19:13 -0500 (CDT)
Subject: [Scipy-svn] r3094 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070611091913.12C6F39C110@new.scipy.org>

Author: cdavid
Date: 2007-06-11 04:18:57 -0500 (Mon, 11 Jun 2007)
New Revision: 3094

Added:
   trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat
   trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat
   trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat
   trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat
   trunk/Lib/sandbox/pyem/tests/generate_tests_data.py
Modified:
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Add basic tests for EM, 1d, 2d, full and diag mode

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-11 09:18:25 UTC (rev 3093)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-11 09:18:57 UTC (rev 3094)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 11 03:00 PM 2007 J
+# Last Change: Mon Jun 11 06:00 PM 2007 J
 
 """Module implementing GM, a class which represents Gaussian mixtures.
 
@@ -132,6 +132,7 @@
         :SeeAlso:
             If you know already the parameters when creating the model, you can
             simply use the method class GM.fromvalues."""
+        #XXX: when fromvalues is called, parameters are called twice...
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
             raise GmParamError("Number of given components is %d, expected %d" 
@@ -664,14 +665,14 @@
     """
         
     # Check that w is valid
-    if N.fabs(N.sum(w, 0)  - 1) > misc._MAX_DBL_DEV:
+    if not len(w.shape) == 1:
+        raise GmParamError('weight should be a rank 1 array')
+
+    if N.fabs(N.sum(w)  - 1) > misc._MAX_DBL_DEV:
         raise GmParamError('weight does not sum to 1')
     
-    if not len(w.shape) == 1:
-        raise GmParamError('weight is not a vector')
-
     # Check that mean and va have the same number of components
-    K           = len(w)
+    K = len(w)
 
     if N.ndim(mu) < 2:
         msg = "mu should be a K,d matrix, and a row vector if only 1 comp"

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 09:18:25 UTC (rev 3093)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 09:18:57 UTC (rev 3094)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 11 01:00 PM 2007 J
+# Last Change: Mon Jun 11 04:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using

Added: trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/tests/diag_1d_3k.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/tests/diag_1d_4k.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/tests/diag_2d_3k.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/tests/full_2d_3k.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/sandbox/pyem/tests/generate_tests_data.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/generate_tests_data.py	2007-06-11 09:18:25 UTC (rev 3093)
+++ trunk/Lib/sandbox/pyem/tests/generate_tests_data.py	2007-06-11 09:18:57 UTC (rev 3094)
@@ -0,0 +1,103 @@
+#! /usr/bin/env python
+# Last Change: Mon Jun 11 05:00 PM 2007 J
+
+# This script generates some random data used for testing EM implementations.
+import copy
+import numpy as N
+from numpy.testing import set_package_path, restore_path
+from scipy.io import savemat, loadmat
+
+set_package_path()
+import pyem
+restore_path()
+
+from pyem import GM, GMM, EM
+
+def generate_dataset(d, k, mode, nframes):
+    """Generate a dataset useful for EM anf GMM testing.
+    
+    returns:
+        data : ndarray
+            data from the true model.
+        tgm : GM
+            the true model (randomly generated)
+        gm0 : GM
+            the initial model
+        gm : GM
+            the trained model
+    """
+    # Generate a model
+    w, mu, va = GM.gen_param(d, k, mode, spread = 2.0)
+    tgm = GM.fromvalues(w, mu, va)
+
+    # Generate data from the model
+    data = tgm.sample(nframes)
+
+    # Run EM on the model, by running the initialization separetely.
+    gmm = GMM(GM(d, k, mode), 'test')
+    gmm.init_random(data)
+    gm0 = copy.copy(gmm.gm)
+
+    gmm = GMM(copy.copy(gmm.gm), 'test')
+    em = EM()
+    em.train(data, gmm)
+
+    return data, tgm, gm0, gmm.gm
+
+def save_dataset(filename, data, tgm, gm0, gm):
+    dic = {'tw': tgm.w, 'tmu': tgm.mu, 'tva': tgm.va,
+            'w0': gm0.w, 'mu0' : gm0.mu, 'va0': gm0.va,
+            'w': gm.w, 'mu': gm.mu, 'va': gm.va,
+            'data': data}
+    savemat(filename, dic)
+
+def doall(d, k, mode):
+    import pylab as P
+
+    data, tgm, gm0, gm = generate_dataset(d, k, mode, 500)
+    filename = mode + '_%dd' % d + '_%dk.mat' % k
+    save_dataset(filename, data, tgm, gm0, gm)
+
+    if d == 1:
+        P.subplot(2, 1, 1)
+        gm0.plot1d()
+        h = tgm.plot1d(gpdf = True)
+        P.hist(data[:, 0], 20, normed = 1, fill = False)
+
+        P.subplot(2, 1, 2)
+        gm.plot1d()
+        tgm.plot1d(gpdf = True)
+        P.hist(data[:, 0], 20, normed = 1, fill = False)
+    else:
+        P.subplot(2, 1, 1)
+        gm0.plot()
+        h = tgm.plot()
+        [i.set_color('g') for i in h]
+        P.plot(data[:, 0], data[:, 1], '.')
+
+        P.subplot(2, 1, 2)
+        gm.plot()
+        h = tgm.plot()
+        [i.set_color('g') for i in h]
+        P.plot(data[:, 0], data[:, 1], '.')
+
+    P.show()
+
+if __name__ == '__main__':
+    N.random.seed(0)
+    d = 2
+    k = 3
+    mode = 'full'
+    doall(d, k, mode)
+
+    N.random.seed(0)
+    d = 2
+    k = 3
+    mode = 'diag'
+    doall(d, k, mode)
+
+    N.random.seed(0)
+    d = 1
+    k = 4
+    mode = 'diag'
+    doall(d, k, mode)

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 09:18:25 UTC (rev 3093)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 09:18:57 UTC (rev 3094)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Mon Jun 11 06:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -12,6 +12,14 @@
 from pyem import GMM, GM, EM
 restore_path()
 
+def load_dataset(filename):
+    from scipy.io import loadmat
+    dic = loadmat(filename, squeeze_me = False)
+    dic['w0'] = dic['w0'].squeeze()
+    dic['w'] = dic['w'].squeeze()
+    dic['tw'] = dic['tw'].squeeze()
+    return dic
+
 class EmTest(NumpyTestCase):
     def _create_model_and_run_em(self, d, k, mode, nframes):
         #+++++++++++++++++++++++++++++++++++++++++++++++++
@@ -32,61 +40,127 @@
         em  = EM()
         lk  = em.train(data, gmm)
 
-class test_full(EmTest):
-    def check_1d(self, level = 1):
-        d       = 1
-        k       = 2
-        mode    = 'full'
-        nframes = int(1e2)
+#class test_full_run(EmTest):
+#    """This class only tests whether the algorithms runs. Do not check the
+#    results."""
+#    def check_1d(self, level = 1):
+#        d       = 1
+#        k       = 2
+#        mode    = 'full'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
+#
+#    def check_2d(self, level = 1):
+#        d       = 2
+#        k       = 2
+#        mode    = 'full'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
+#
+#    def check_5d(self, level = 1):
+#        d       = 5
+#        k       = 3
+#        mode    = 'full'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
+#
+#class test_diag_run(EmTest):
+#    """This class only tests whether the algorithms runs. Do not check the
+#    results."""
+#    def check_1d(self, level = 1):
+#        d       = 1
+#        k       = 2
+#        mode    = 'diag'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
+#
+#    def check_2d(self, level = 1):
+#        d       = 2
+#        k       = 2
+#        mode    = 'diag'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
+#
+#    def check_5d(self, level = 1):
+#        d       = 5
+#        k       = 3
+#        mode    = 'diag'
+#        nframes = int(1e2)
+#
+#        #seed(1)
+#        self._create_model_and_run_em(d, k, mode, nframes)
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+class test_datasets(EmTest):
+    """This class tests whether the EM algorithms works using pre-computed
+    datasets."""
+    def check_1d_full(self, level = 1):
+        d = 1
+        k = 4
+        mode = 'full'
+        # Data are exactly the same than in diagonal mode, just check that
+        # calling full mode works even in 1d, even if it is kind of stupid to
+        # do so
+        dic = load_dataset('diag_1d_4k.mat')
 
-    def check_2d(self, level = 1):
-        d       = 2
-        k       = 2
-        mode    = 'full'
-        nframes = int(1e2)
+        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+        gmm = GMM(gm, 'test')
+        EM().train(dic['data'], gmm)
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+        assert_array_equal(gmm.gm.w, dic['w'])
+        assert_array_equal(gmm.gm.mu, dic['mu'])
+        assert_array_equal(gmm.gm.va, dic['va'])
 
-    def check_5d(self, level = 1):
-        d       = 5
-        k       = 3
-        mode    = 'full'
-        nframes = int(1e2)
+    def check_1d_diag(self, level = 1):
+        d = 1
+        k = 4
+        mode = 'diag'
+        dic = load_dataset('diag_1d_4k.mat')
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+        gmm = GMM(gm, 'test')
+        EM().train(dic['data'], gmm)
 
-class test_diag(EmTest):
-    def check_1d(self, level = 1):
-        d       = 1
-        k       = 2
-        mode    = 'diag'
-        nframes = int(1e2)
+        assert_array_equal(gmm.gm.w, dic['w'])
+        assert_array_equal(gmm.gm.mu, dic['mu'])
+        assert_array_equal(gmm.gm.va, dic['va'])
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+    def check_2d_full(self, level = 1):
+        d = 2
+        k = 3
+        mode = 'full'
+        dic = load_dataset('full_2d_3k.mat')
 
-    def check_2d(self, level = 1):
-        d       = 2
-        k       = 2
-        mode    = 'diag'
-        nframes = int(1e2)
+        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+        gmm = GMM(gm, 'test')
+        EM().train(dic['data'], gmm)
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+        assert_array_equal(gmm.gm.w, dic['w'])
+        assert_array_equal(gmm.gm.mu, dic['mu'])
+        assert_array_equal(gmm.gm.va, dic['va'])
 
-    def check_5d(self, level = 1):
-        d       = 5
-        k       = 3
-        mode    = 'diag'
-        nframes = int(1e2)
+    def check_2d_diag(self, level = 1):
+        d = 2
+        k = 3
+        mode = 'diag'
+        dic = load_dataset('diag_2d_3k.mat')
 
-        #seed(1)
-        self._create_model_and_run_em(d, k, mode, nframes)
+        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+        gmm = GMM(gm, 'test')
+        EM().train(dic['data'], gmm)
 
+        assert_array_equal(gmm.gm.w, dic['w'])
+        assert_array_equal(gmm.gm.mu, dic['mu'])
+        assert_array_equal(gmm.gm.va, dic['va'])
+
 if __name__ == "__main__":
     NumpyTest().run()


From scipy-svn at scipy.org  Mon Jun 11 05:32:23 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 04:32:23 -0500 (CDT)
Subject: [Scipy-svn] r3095 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070611093223.5DD3339C05A@new.scipy.org>

Author: cdavid
Date: 2007-06-11 04:32:17 -0500 (Mon, 11 Jun 2007)
New Revision: 3095

Modified:
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Reenable tests I forgot to uncomment in gmm_em tests

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 09:18:57 UTC (rev 3094)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 09:32:17 UTC (rev 3095)
@@ -40,66 +40,66 @@
         em  = EM()
         lk  = em.train(data, gmm)
 
-#class test_full_run(EmTest):
-#    """This class only tests whether the algorithms runs. Do not check the
-#    results."""
-#    def check_1d(self, level = 1):
-#        d       = 1
-#        k       = 2
-#        mode    = 'full'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
-#
-#    def check_2d(self, level = 1):
-#        d       = 2
-#        k       = 2
-#        mode    = 'full'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
-#
-#    def check_5d(self, level = 1):
-#        d       = 5
-#        k       = 3
-#        mode    = 'full'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
-#
-#class test_diag_run(EmTest):
-#    """This class only tests whether the algorithms runs. Do not check the
-#    results."""
-#    def check_1d(self, level = 1):
-#        d       = 1
-#        k       = 2
-#        mode    = 'diag'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
-#
-#    def check_2d(self, level = 1):
-#        d       = 2
-#        k       = 2
-#        mode    = 'diag'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
-#
-#    def check_5d(self, level = 1):
-#        d       = 5
-#        k       = 3
-#        mode    = 'diag'
-#        nframes = int(1e2)
-#
-#        #seed(1)
-#        self._create_model_and_run_em(d, k, mode, nframes)
+class test_full_run(EmTest):
+    """This class only tests whether the algorithms runs. Do not check the
+    results."""
+    def check_1d(self, level = 1):
+        d       = 1
+        k       = 2
+        mode    = 'full'
+        nframes = int(1e2)
 
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
+    def check_2d(self, level = 1):
+        d       = 2
+        k       = 2
+        mode    = 'full'
+        nframes = int(1e2)
+
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
+    def check_5d(self, level = 1):
+        d       = 5
+        k       = 3
+        mode    = 'full'
+        nframes = int(1e2)
+
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
+class test_diag_run(EmTest):
+    """This class only tests whether the algorithms runs. Do not check the
+    results."""
+    def check_1d(self, level = 1):
+        d       = 1
+        k       = 2
+        mode    = 'diag'
+        nframes = int(1e2)
+
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
+    def check_2d(self, level = 1):
+        d       = 2
+        k       = 2
+        mode    = 'diag'
+        nframes = int(1e2)
+
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
+    def check_5d(self, level = 1):
+        d       = 5
+        k       = 3
+        mode    = 'diag'
+        nframes = int(1e2)
+
+        #seed(1)
+        self._create_model_and_run_em(d, k, mode, nframes)
+
 class test_datasets(EmTest):
     """This class tests whether the EM algorithms works using pre-computed
     datasets."""


From scipy-svn at scipy.org  Mon Jun 11 06:12:22 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 05:12:22 -0500 (CDT)
Subject: [Scipy-svn] r3096 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070611101222.46FBD39C0B1@new.scipy.org>

Author: cdavid
Date: 2007-06-11 05:12:10 -0500 (Mon, 11 Jun 2007)
New Revision: 3096

Modified:
   trunk/Lib/sandbox/pyem/tests/test_densities.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Convert check calls to test calls in tests, for future convertion to setuptools

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-11 09:32:17 UTC (rev 3095)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-11 10:12:10 UTC (rev 3096)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 02:00 PM 2007 J
+# Last Change: Mon Jun 11 06:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -68,24 +68,24 @@
             0.00000253261067, 0.00000001526368])
 
 class test_py_implementation(TestDensities):
-    def _check(self, level, decimal = DEF_DEC):
+    def _test(self, level, decimal = DEF_DEC):
         Y   = gauss_den(self.X, self.mu, self.va)
         assert_array_almost_equal(Y, self.Yt, decimal)
 
-    def check_2d_diag(self, level = 0):
+    def test_2d_diag(self, level = 0):
         self._generate_test_data_2d_diag()
-        self._check(level)
+        self._test(level)
 
-    def check_2d_full(self, level = 0):
+    def test_2d_full(self, level = 0):
         self._generate_test_data_2d_full()
-        self._check(level)
+        self._test(level)
     
-    def check_py_1d(self, level = 0):
+    def test_py_1d(self, level = 0):
         self._generate_test_data_1d()
-        self._check(level)
+        self._test(level)
 
 class test_c_implementation(TestDensities):
-    def _check(self, level, decimal = DEF_DEC):
+    def _test(self, level, decimal = DEF_DEC):
         try:
             from pyem._c_densities import gauss_den as c_gauss_den
             Y   = c_gauss_den(self.X, self.mu, self.va)
@@ -94,17 +94,17 @@
             print "Error while importing C implementation, not tested"
             print " -> (Import error was %s)" % inst 
 
-    def check_1d(self, level = 0):
+    def test_1d(self, level = 0):
         self._generate_test_data_1d()
-        self._check(level)
+        self._test(level)
 
-    def check_2d_diag(self, level = 0):
+    def test_2d_diag(self, level = 0):
         self._generate_test_data_2d_diag()
-        self._check(level)
+        self._test(level)
 
-    def check_2d_full(self, level = 0):
+    def test_2d_full(self, level = 0):
         self._generate_test_data_2d_full()
-        self._check(level)
+        self._test(level)
 
 class test_gauss_ell(NumpyTestCase):
     def test_dim(self):

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 09:32:17 UTC (rev 3095)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 10:12:10 UTC (rev 3096)
@@ -43,7 +43,7 @@
 class test_full_run(EmTest):
     """This class only tests whether the algorithms runs. Do not check the
     results."""
-    def check_1d(self, level = 1):
+    def test_1d(self, level = 1):
         d       = 1
         k       = 2
         mode    = 'full'
@@ -52,7 +52,7 @@
         #seed(1)
         self._create_model_and_run_em(d, k, mode, nframes)
 
-    def check_2d(self, level = 1):
+    def test_2d(self, level = 1):
         d       = 2
         k       = 2
         mode    = 'full'
@@ -61,7 +61,7 @@
         #seed(1)
         self._create_model_and_run_em(d, k, mode, nframes)
 
-    def check_5d(self, level = 1):
+    def test_5d(self, level = 1):
         d       = 5
         k       = 3
         mode    = 'full'
@@ -71,9 +71,9 @@
         self._create_model_and_run_em(d, k, mode, nframes)
 
 class test_diag_run(EmTest):
-    """This class only tests whether the algorithms runs. Do not check the
+    """This class only tests whether the algorithms runs. Do not test the
     results."""
-    def check_1d(self, level = 1):
+    def test_1d(self, level = 1):
         d       = 1
         k       = 2
         mode    = 'diag'
@@ -82,7 +82,7 @@
         #seed(1)
         self._create_model_and_run_em(d, k, mode, nframes)
 
-    def check_2d(self, level = 1):
+    def test_2d(self, level = 1):
         d       = 2
         k       = 2
         mode    = 'diag'
@@ -91,7 +91,7 @@
         #seed(1)
         self._create_model_and_run_em(d, k, mode, nframes)
 
-    def check_5d(self, level = 1):
+    def test_5d(self, level = 1):
         d       = 5
         k       = 3
         mode    = 'diag'
@@ -103,11 +103,11 @@
 class test_datasets(EmTest):
     """This class tests whether the EM algorithms works using pre-computed
     datasets."""
-    def check_1d_full(self, level = 1):
+    def test_1d_full(self, level = 1):
         d = 1
         k = 4
         mode = 'full'
-        # Data are exactly the same than in diagonal mode, just check that
+        # Data are exactly the same than in diagonal mode, just test that
         # calling full mode works even in 1d, even if it is kind of stupid to
         # do so
         dic = load_dataset('diag_1d_4k.mat')
@@ -120,7 +120,7 @@
         assert_array_equal(gmm.gm.mu, dic['mu'])
         assert_array_equal(gmm.gm.va, dic['va'])
 
-    def check_1d_diag(self, level = 1):
+    def test_1d_diag(self, level = 1):
         d = 1
         k = 4
         mode = 'diag'
@@ -134,7 +134,7 @@
         assert_array_equal(gmm.gm.mu, dic['mu'])
         assert_array_equal(gmm.gm.va, dic['va'])
 
-    def check_2d_full(self, level = 1):
+    def test_2d_full(self, level = 1):
         d = 2
         k = 3
         mode = 'full'
@@ -148,7 +148,7 @@
         assert_array_equal(gmm.gm.mu, dic['mu'])
         assert_array_equal(gmm.gm.va, dic['va'])
 
-    def check_2d_diag(self, level = 1):
+    def test_2d_diag(self, level = 1):
         d = 2
         k = 3
         mode = 'diag'


From scipy-svn at scipy.org  Mon Jun 11 06:34:27 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 05:34:27 -0500 (CDT)
Subject: [Scipy-svn] r3097 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070611103427.1172E39C1B1@new.scipy.org>

Author: cdavid
Date: 2007-06-11 05:34:20 -0500 (Mon, 11 Jun 2007)
New Revision: 3097

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/tests/test_densities.py
Log:
Add tests for pdf computation in log domain (1st step for logsumexp trick support)

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-11 10:12:10 UTC (rev 3096)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-11 10:34:20 UTC (rev 3097)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Mon Jun 11 03:00 PM 2007 J
+# Last Change: Mon Jun 11 07:00 PM 2007 J
 """This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
@@ -119,7 +119,7 @@
     if not log:
         y   = fac * N.exp(y)
     else:
-        y   = y + log(fac)
+        y   += N.log(fac)
 
     return y
     

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-11 10:12:10 UTC (rev 3096)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-11 10:34:20 UTC (rev 3097)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon Jun 11 06:00 PM 2007 J
+# Last Change: Mon Jun 11 07:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -72,6 +72,10 @@
         Y   = gauss_den(self.X, self.mu, self.va)
         assert_array_almost_equal(Y, self.Yt, decimal)
 
+    def _test_log(self, level, decimal = DEF_DEC):
+        Y   = gauss_den(self.X, self.mu, self.va, log = True)
+        assert_array_almost_equal(N.exp(Y), self.Yt, decimal)
+
     def test_2d_diag(self, level = 0):
         self._generate_test_data_2d_diag()
         self._test(level)
@@ -80,10 +84,22 @@
         self._generate_test_data_2d_full()
         self._test(level)
     
-    def test_py_1d(self, level = 0):
+    def test_1d(self, level = 0):
         self._generate_test_data_1d()
         self._test(level)
 
+    def test_2d_diag_log(self, level = 0):
+        self._generate_test_data_2d_diag()
+        self._test_log(level)
+
+    def test_2d_full_log(self, level = 0):
+        self._generate_test_data_2d_full()
+        self._test_log(level)
+
+    def test_1d_log(self, level = 0):
+        self._generate_test_data_1d()
+        self._test_log(level)
+
 class test_c_implementation(TestDensities):
     def _test(self, level, decimal = DEF_DEC):
         try:


From scipy-svn at scipy.org  Tue Jun 12 00:04:27 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 11 Jun 2007 23:04:27 -0500 (CDT)
Subject: [Scipy-svn] r3098 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070612040427.3111439C092@new.scipy.org>

Author: cdavid
Date: 2007-06-11 23:04:14 -0500 (Mon, 11 Jun 2007)
New Revision: 3098

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_densities.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Add logsumexp function + tests. Not used in the code yet, though

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-11 10:34:20 UTC (rev 3097)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-12 04:04:14 UTC (rev 3098)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Mon Jun 11 07:00 PM 2007 J
+# Last Change: Tue Jun 12 12:00 PM 2007 J
 """This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
@@ -268,7 +268,13 @@
 
     return elps[0, :], elps[1, :]
 
-def multiple_gauss_den(data, mu, va):
+def logsumexp(x):
+    """Compute log(sum(exp(a), 1)) while avoiding underflow."""
+    axis = 1
+    mc = N.max(x, axis)
+    return mc + N.log(N.sum(N.exp(x-mc[:, N.newaxis]), axis))
+
+def multiple_gauss_den(data, mu, va, log = False):
     """Helper function to generate several Gaussian
     pdf (different parameters) at the same points
 
@@ -283,6 +289,8 @@
             variance of the pdf. One row per different component for diagonal
             covariance (k, d), or d rows per component for full matrix pdf
             (k*d,d).
+        log : boolean
+            if True, returns the log-pdf instead of the pdf.
 
     :Returns:
         Returns a (n, k) array, each column i being the pdf of the ith mean and
@@ -297,11 +305,11 @@
     y = N.zeros((K, n))
     if N.size(mu) == N.size(va):
         for i in range(K):
-            y[i] = gauss_den(data, mu[i, :], va[i, :])
+            y[i] = gauss_den(data, mu[i, :], va[i, :], log)
         return y.T
     else:
         for i in range(K):
-            y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :])
+            y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :], log)
         return y.T
 
 if __name__ == "__main__":

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 10:34:20 UTC (rev 3097)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-12 04:04:14 UTC (rev 3098)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 11 04:00 PM 2007 J
+# Last Change: Tue Jun 12 11:00 AM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -101,8 +101,6 @@
             for i in range(k):
                 va[i*d:i*d+d]   = N.dot( va[i*d:i*d+d], 
                     va[i*d:i*d+d].T)
-            #raise GmmParamError("init_random not implemented for "\
-            #        "mode %s yet" % self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         
@@ -150,11 +148,10 @@
         self.isinit = False
         self.initst = init
 
-    def sufficient_statistics(self, data):
+    def compute_responsabilities(self, data):
         """Compute responsabilities.
         
-        Return normalized and non-normalized sufficient statistics from the
-        model.
+        Return normalized and non-normalized respondabilities for the model.
         
         Note
         ----
@@ -325,11 +322,11 @@
         like    = N.zeros(maxiter)
 
         # Em computation, with computation of the likelihood
-        g, tgd      = model.sufficient_statistics(data)
+        g, tgd      = model.compute_responsabilities(data)
         like[0]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
         model.update_em(data, g)
         for i in range(1, maxiter):
-            g, tgd      = model.sufficient_statistics(data)
+            g, tgd      = model.compute_responsabilities(data)
             like[i]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
             model.update_em(data, g)
             if has_em_converged(like[i], like[i-1], thresh):
@@ -337,21 +334,6 @@
 
         return like
     
-#def regularize_diag(variance, alpha = _DEF_ALPHA):
-#    delta   = N.sum(variance) / variance.size
-#    if delta > _MIN_DBL_DELTA:
-#        return variance + alpha * delta
-#    else:
-#        return variance + alpha * _MIN_DBL_DELTA
-#
-#def regularize_full(variance):
-#    # Trace of a positive definite matrix is always > 0
-#    delta   = N.trace(variance) / variance.shape[0]
-#    if delta > _MIN_DBL_DELTA:
-#        return variance + alpha * delta
-#    else:
-#        return variance + alpha * _MIN_DBL_DELTA
-
 # Misc functions
 def bic(lk, deg, n):
     """ Expects lk to be log likelihood """
@@ -370,115 +352,6 @@
 
 if __name__ == "__main__":
     pass
-    ## import copy
-    ## #=============================
-    ## # Simple GMM with 5 components
-    ## #=============================
-
-    ## #+++++++++++++++++++++++++++++
-    ## # Meta parameters of the model
-    ## #   - k: Number of components
-    ## #   - d: dimension of each Gaussian
-    ## #   - mode: Mode of covariance matrix: full or diag
-    ## #   - nframes: number of frames (frame = one data point = one
-    ## #   row of d elements
-    ## k       = 2 
-    ## d       = 1
-    ## mode    = 'full'
-    ## nframes = 1e3
-
-    ## #+++++++++++++++++++++++++++++++++++++++++++
-    ## # Create an artificial GMM model, samples it
-    ## #+++++++++++++++++++++++++++++++++++++++++++
-    ## print "Generating the mixture"
-    ## # Generate a model with k components, d dimensions
-    ## w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    ## gm          = GM(d, k, mode)
-    ## gm.set_param(w, mu, va)
-
-    ## # Sample nframes frames  from the model
-    ## data    = gm.sample(nframes)
-
-    ## #++++++++++++++++++++++++
-    ## # Learn the model with EM
-    ## #++++++++++++++++++++++++
-
-    ## # Init the model
-    ## print "Init a model for learning, with kmean for initialization"
-    ## lgm = GM(d, k, mode)
-    ## gmm = GMM(lgm, 'kmean')
-    ## gmm.init(data)
-
-    ## # Keep the initialized model for drawing
-    ## gm0 = copy.copy(lgm)
-
-    ## # The actual EM, with likelihood computation
-    ## niter   = 10
-    ## like    = N.zeros(niter)
-
-    ## print "computing..."
-    ## for i in range(niter):
-    ##     g, tgd  = gmm.sufficient_statistics(data)
-    ##     like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-    ##     gmm.update_em(data, g)
-    ## # # Alternative form, by using EM class: as the EM class
-    ## # # is quite rudimentary now, it is not very useful, just save
-    ## # # a few lines
-    ## # em      = EM()
-    ## # like    = em.train(data, gmm, niter)
-
-    ## #+++++++++++++++
-    ## # Draw the model
-    ## #+++++++++++++++
-    ## print "drawing..."
-    ## import pylab as P
-    ## P.subplot(2, 1, 1)
-
-    ## if not d == 1:
-    ##     # Draw what is happening
-    ##     P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-    ##     # Real confidence ellipses
-    ##     Xre, Yre  = gm.conf_ellipses()
-    ##     P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
-    ##     for i in range(1,k):
-    ##         P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
-
-    ##     # Initial confidence ellipses as found by kmean
-    ##     X0e, Y0e  = gm0.conf_ellipses()
-    ##     P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
-    ##     for i in range(1,k):
-    ##         P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
-
-    ##     # Values found by EM
-    ##     Xe, Ye  = lgm.conf_ellipses()
-    ##     P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by"
-    ##      "EM")
-    ##     for i in range(1,k):
-    ##         P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
-    ##     P.legend(loc = 0)
-    ## else:
-    ##     # Real confidence ellipses
-    ##     h   = gm.plot1d()
-    ##     [i.set_color('g') for i in h['pdf']]
-    ##     h['pdf'][0].set_label('true pdf')
-
-    ##     # Initial confidence ellipses as found by kmean
-    ##     h0  = gm0.plot1d()
-    ##     [i.set_color('k') for i in h0['pdf']]
-    ##     h0['pdf'][0].set_label('initial pdf')
-
-    ##     # Values found by EM
-    ##     hl  = lgm.plot1d(fill = 1, level = 0.66)
-    ##     [i.set_color('r') for i in hl['pdf']]
-    ##     hl['pdf'][0].set_label('pdf found by EM')
-
-    ##     P.legend(loc = 0)
-
-    ## P.subplot(2, 1, 2)
-    ## P.plot(like)
-    ## P.title('log likelihood')
-
     ## # #++++++++++++++++++
     ## # # Export the figure
     ## # #++++++++++++++++++

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-11 10:34:20 UTC (rev 3097)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-12 04:04:14 UTC (rev 3098)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon Jun 11 07:00 PM 2007 J
+# Last Change: Tue Jun 12 12:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -100,6 +100,58 @@
         self._generate_test_data_1d()
         self._test_log(level)
 
+class test_py_logsumexp(TestDensities):
+    """Class to compare logsumexp vs naive implementation."""
+    def test_underlow(self):
+        """This function checks that logsumexp works as expected."""
+        # We check wether naive implementation would underflow, to be sure we
+        # are actually testing something here.
+        N.seterr(under='raise')
+        try:
+            a = N.array([[-1000]])
+            self.naive_logsumexp(a)
+            raise AssertionError("expected to catch underflow, we should not be here")
+        except FloatingPointError, e:
+            print "Catching underflow, as expected"
+        assert pyem.densities.logsumexp(a) == -1000.
+        try:
+            a = N.array([[-1000, -1000, -1000]])
+            self.naive_logsumexp(a)
+            raise AssertionError("expected to catch underflow, we should not be here")
+        except FloatingPointError, e:
+            print "Catching underflow, as expected"
+        assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771)
+
+    def naive_logsumexp(self, data):
+        return N.log(N.sum(N.exp(data), 1)) 
+
+    def test_1d(self):
+        data = N.random.randn(1e1)[:, N.newaxis]
+        mu = N.array([[-5], [-6]])
+        va = N.array([[0.1], [0.1]])
+        y = pyem.densities.multiple_gauss_den(data, mu, va, log = True)
+        a1 = pyem.densities.logsumexp(y)
+        a2 = self.naive_logsumexp(y)
+        assert_array_equal(a1, a2)
+
+    def test_2d_full(self):
+        data = N.random.randn(1e1, 2)
+        mu = N.array([[-3, -1], [3, 3]])
+        va = N.array([[1.1, 0.4], [0.6, 0.8], [0.4, 0.2], [0.3, 0.9]])
+        y = pyem.densities.multiple_gauss_den(data, mu, va, log = True)
+        a1 = pyem.densities.logsumexp(y)
+        a2 = self.naive_logsumexp(y)
+        assert_array_almost_equal(a1, a2, DEF_DEC)
+
+    def test_2d_diag(self):
+        data = N.random.randn(1e1, 2)
+        mu = N.array([[-3, -1], [3, 3]])
+        va = N.array([[1.1, 0.4], [0.6, 0.8]])
+        y = pyem.densities.multiple_gauss_den(data, mu, va, log = True)
+        a1 = pyem.densities.logsumexp(y)
+        a2 = self.naive_logsumexp(y)
+        assert_array_almost_equal(a1, a2, DEF_DEC)
+
 class test_c_implementation(TestDensities):
     def _test(self, level, decimal = DEF_DEC):
         try:

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-11 10:34:20 UTC (rev 3097)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-12 04:04:14 UTC (rev 3098)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon Jun 11 06:00 PM 2007 J
+# Last Change: Tue Jun 12 11:00 AM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 

From scipy-svn at scipy.org  Tue Jun 12 08:21:14 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Tue, 12 Jun 2007 07:21:14 -0500 (CDT)
Subject: [Scipy-svn] r3099 - in trunk/Lib/sandbox/pyem: . tests
Message-ID: <20070612122114.BC3F739C19F@new.scipy.org>

Author: cdavid
Date: 2007-06-12 07:21:04 -0500 (Tue, 12 Jun 2007)
New Revision: 3099

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_densities.py
   trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Add function to compute log responsabilities with logsumexp.

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Tue Jun 12 12:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
 """This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
@@ -167,14 +167,6 @@
     inva    = lin.inv(va)
     fac     = 1 / N.sqrt( (2*N.pi) ** d * N.fabs(lin.det(va)))
 
-    # # Slow version
-    # n       = N.size(x, 0)
-    # y       = N.zeros(n)
-    # for i in range(n):
-    #     y[i] = N.dot(x[i,:],
-    #              N.dot(inva, N.transpose(x[i,:])))
-    # y *= -0.5
-
     # we are using a trick with sum to "emulate" 
     # the matrix multiplication inva * x without any explicit loop
     y   = N.dot((x-mu), inva)

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 11 06:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
 
 """Module implementing GM, a class which represents Gaussian mixtures.
 
@@ -12,7 +12,7 @@
 import numpy as N
 from numpy.random import randn, rand
 import numpy.linalg as lin
-import densities
+import densities as D
 import misc
 
 # Right now, two main usages of a Gaussian Model are possible
@@ -276,13 +276,13 @@
         Ye  = []   
         if self.mode == 'diag':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i, :], self.va[i, :], 
+                xe, ye  = D.gauss_ell(self.mu[i, :], self.va[i, :], 
                         dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
         elif self.mode == 'full':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i, :], 
+                xe, ye  = D.gauss_ell(self.mu[i, :], 
                         self.va[i*self.d:i*self.d+self.d, :], 
                         dim, npoints, level)
                 Xe.append(xe)
@@ -317,6 +317,7 @@
             raise GmParamError("Unknown mode")
 
         return True
+
     @classmethod
     def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
         """Generate random, valid parameters for a gaussian mixture model.
@@ -366,6 +367,27 @@
     # def _regularize(self):
     #     raise NotImplemented("No regularization")
 
+    def pdf(self, x, log = False):
+        """Computes the pdf of the model at given points.
+
+        :Parameters:
+            x : ndarray
+                points where to estimate the pdf. One row for one
+                multi-dimensional sample (eg to estimate the pdf at 100
+                different points in 10 dimension, data's shape should be (100,
+                20)).
+            log : bool
+                If true, returns the log pdf instead of the pdf.
+
+        :Returns:
+            y : ndarray
+                the pdf at points x."""
+        if log:
+            return D.logsumexp(N.sum(
+                    D.multiple_gauss_den(x, self.mu, self.va, log = True) * self.w, 1))
+        else:
+            return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
+
     #=================
     # Plotting methods
     #=================
@@ -572,7 +594,7 @@
         # XXX refactor computing pdf
         dmu = self.mu[:, dim]
         dva = self._get_va(dim)
-        den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
+        den = D.multiple_gauss_den(xdata, dmu, dva) * self.w
         den = N.sum(den, 1)
         den = den.reshape(len(rangey), len(rangex))
 

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Tue Jun 12 11:00 AM 2007 J
+# Last Change: Tue Jun 12 08:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -159,7 +159,7 @@
         knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
         i) = P[state = i | observation = data(t); w, mu, va]
 
-        This is basically the E step of EM for GMM."""
+        This is basically the E step of EM for finite mixtures."""
         # compute the gaussian pdf
         tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
@@ -169,6 +169,28 @@
 
         return gd, tgd
 
+    def compute_log_responsabilities(self, data):
+        """Compute log responsabilities.
+        
+        Return normalized and non-normalized responsabilities for the model (in
+        the log domain)
+        
+        Note
+        ----
+        Computes the latent variable distribution (a posteriori probability)
+        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
+        i) = P[state = i | observation = data(t); w, mu, va]
+
+        This is basically the E step of EM for finite mixtures."""
+        # compute the gaussian pdf
+        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True)
+        # multiply by the weight
+        tgd	+= N.log(self.gm.w)
+        # Normalize to get a pdf
+        gd	= tgd  - densities.logsumexp(tgd)[:, N.newaxis]
+
+        return gd, tgd
+
     def update_em(self, data, gamma):
         """Computes update of the Gaussian Mixture Model (M step)
         from the a posteriori pdf, computed by gmm_posterior

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Tue Jun 12 12:00 PM 2007 J
+# Last Change: Tue Jun 12 08:00 PM 2007 J
 
 # TODO:
 #   - having "fake tests" to check that all mode (scalar, diag and full) are
@@ -21,7 +21,7 @@
 # import modules that are located in the same directory as this file.
 restore_path()
 
-DEF_DEC = 12
+from testcommon import DEF_DEC
 
 class TestDensities(NumpyTestCase):
     def _generate_test_data_1d(self):

Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon Jun 11 03:00 PM 2007 J
+# Last Change: Tue Jun 12 03:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -10,6 +10,7 @@
 
 set_package_path()
 from pyem import GM
+from pyem.densities import multiple_gauss_den
 restore_path()
 
 class test_BasicFunc(NumpyTestCase):
@@ -58,6 +59,16 @@
         sva = gm._get_va(dim)
         assert N.all(sva == tva)
 
+    def test_2d_diag_pdf(self):
+        d = 2
+        w = N.array([0.4, 0.6])
+        mu = N.array([[0., 2], [-1, -2]])
+        va = N.array([[1, 0.5], [0.5, 1]])
+        x = N.random.randn(100, 2)
+        gm = GM.fromvalues(w, mu, va)
+        y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1)
+        y2 = gm.pdf(x)
+        assert_array_almost_equal(y1, y2)
 
 if __name__ == "__main__":
     NumpyTest().run()

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-12 04:04:14 UTC (rev 3098)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-12 12:21:04 UTC (rev 3099)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Tue Jun 12 11:00 AM 2007 J
+# Last Change: Tue Jun 12 09:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -12,6 +12,8 @@
 from pyem import GMM, GM, EM
 restore_path()
 
+from testcommon import DEF_DEC
+
 def load_dataset(filename):
     from scipy.io import loadmat
     dic = loadmat(filename, squeeze_me = False)
@@ -162,5 +164,46 @@
         assert_array_equal(gmm.gm.mu, dic['mu'])
         assert_array_equal(gmm.gm.va, dic['va'])
 
+class test_log_domain(EmTest):
+    """This class tests whether the GMM works in log domain."""
+    def _test_common(self, d, k, mode):
+        dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k))
+
+        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
+        gmm = GMM(gm, 'test')
+
+        a, na = gmm.compute_responsabilities(dic['data'])
+        la, nla = gmm.compute_log_responsabilities(dic['data'])
+
+        ta = N.log(a)
+        tna = N.log(na)
+        if not N.all(N.isfinite(ta)):
+            print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+        else:
+            assert_array_almost_equal(ta, la, DEF_DEC)
+
+        if not N.all(N.isfinite(tna)):
+            print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+        else:
+            assert_array_almost_equal(tna, nla, DEF_DEC)
+
+    def test_2d_diag(self, level = 1):
+        d = 2
+        k = 3
+        mode = 'diag'
+        self._test_common(d, k, mode)
+
+    def test_1d_full(self, level = 1):
+        d = 1
+        k = 4
+        mode = 'diag'
+        self._test_common(d, k, mode)
+
+    def test_2d_full(self, level = 1):
+        d = 2
+        k = 3
+        mode = 'full'
+        self._test_common(d, k, mode)
+
 if __name__ == "__main__":
     NumpyTest().run()


From scipy-svn at scipy.org  Wed Jun 13 06:08:20 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 05:08:20 -0500 (CDT)
Subject: [Scipy-svn] r3100 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070613100820.B5BFA39C0D3@new.scipy.org>

Author: cdavid
Date: 2007-06-13 05:08:00 -0500 (Wed, 13 Jun 2007)
New Revision: 3100

Added:
   trunk/Lib/sandbox/pyem/tests/testcommon.py
Modified:
   trunk/Lib/sandbox/pyem/tests/test_densities.py
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Fix importing datasets in pyem/test

Modified: trunk/Lib/sandbox/pyem/tests/test_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-12 12:21:04 UTC (rev 3099)
+++ trunk/Lib/sandbox/pyem/tests/test_densities.py	2007-06-13 10:08:00 UTC (rev 3100)
@@ -19,10 +19,9 @@
 #Optional:
 set_local_path()
 # import modules that are located in the same directory as this file.
+from testcommon import DEF_DEC
 restore_path()
 
-from testcommon import DEF_DEC
-
 class TestDensities(NumpyTestCase):
     def _generate_test_data_1d(self):
         self.va     = 2.0
@@ -106,21 +105,24 @@
         """This function checks that logsumexp works as expected."""
         # We check wether naive implementation would underflow, to be sure we
         # are actually testing something here.
-        N.seterr(under='raise')
+        errst = N.seterr(under='raise')
         try:
-            a = N.array([[-1000]])
-            self.naive_logsumexp(a)
-            raise AssertionError("expected to catch underflow, we should not be here")
-        except FloatingPointError, e:
-            print "Catching underflow, as expected"
-        assert pyem.densities.logsumexp(a) == -1000.
-        try:
-            a = N.array([[-1000, -1000, -1000]])
-            self.naive_logsumexp(a)
-            raise AssertionError("expected to catch underflow, we should not be here")
-        except FloatingPointError, e:
-            print "Catching underflow, as expected"
-        assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771)
+            try:
+                a = N.array([[-1000]])
+                self.naive_logsumexp(a)
+                raise AssertionError("expected to catch underflow, we should not be here")
+            except FloatingPointError, e:
+                print "Catching underflow, as expected"
+            assert pyem.densities.logsumexp(a) == -1000.
+            try:
+                a = N.array([[-1000, -1000, -1000]])
+                self.naive_logsumexp(a)
+                raise AssertionError("expected to catch underflow, we should not be here")
+            except FloatingPointError, e:
+                print "Catching underflow, as expected"
+            assert_array_almost_equal(pyem.densities.logsumexp(a), -998.90138771)
+        finally:
+            N.seterr(under=errst['under'])
 
     def naive_logsumexp(self, data):
         return N.log(N.sum(N.exp(data), 1)) 

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-12 12:21:04 UTC (rev 3099)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-13 10:08:00 UTC (rev 3100)
@@ -4,6 +4,7 @@
 # For now, just test that all mode/dim execute correctly
 
 import sys
+import os 
 from numpy.testing import *
 
 import numpy as N
@@ -12,11 +13,15 @@
 from pyem import GMM, GM, EM
 restore_path()
 
+set_local_path()
+# import modules that are located in the same directory as this file.
 from testcommon import DEF_DEC
+curpath = sys.path[0]
+restore_path()
 
 def load_dataset(filename):
     from scipy.io import loadmat
-    dic = loadmat(filename, squeeze_me = False)
+    dic = loadmat(os.path.join(curpath, filename), squeeze_me = False)
     dic['w0'] = dic['w0'].squeeze()
     dic['w'] = dic['w'].squeeze()
     dic['tw'] = dic['tw'].squeeze()

Added: trunk/Lib/sandbox/pyem/tests/testcommon.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/testcommon.py	2007-06-12 12:21:04 UTC (rev 3099)
+++ trunk/Lib/sandbox/pyem/tests/testcommon.py	2007-06-13 10:08:00 UTC (rev 3100)
@@ -0,0 +1 @@
+DEF_DEC = 12


From scipy-svn at scipy.org  Wed Jun 13 06:26:10 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 05:26:10 -0500 (CDT)
Subject: [Scipy-svn] r3101 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070613102610.27CC239C1B2@new.scipy.org>

Author: cdavid
Date: 2007-06-13 05:26:06 -0500 (Wed, 13 Jun 2007)
New Revision: 3101

Modified:
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
More fix for broken tests in gmm_em

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-13 10:08:00 UTC (rev 3100)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-13 10:26:06 UTC (rev 3101)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Tue Jun 12 09:00 PM 2007 J
+# Last Change: Wed Jun 13 07:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -123,9 +123,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'])
-        assert_array_equal(gmm.gm.mu, dic['mu'])
-        assert_array_equal(gmm.gm.va, dic['va'])
+        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
     def test_1d_diag(self, level = 1):
         d = 1
@@ -137,9 +137,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'])
-        assert_array_equal(gmm.gm.mu, dic['mu'])
-        assert_array_equal(gmm.gm.va, dic['va'])
+        assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
     def test_2d_full(self, level = 1):
         d = 2
@@ -151,9 +151,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'])
-        assert_array_equal(gmm.gm.mu, dic['mu'])
-        assert_array_equal(gmm.gm.va, dic['va'])
+        assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
     def test_2d_diag(self, level = 1):
         d = 2
@@ -165,9 +165,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'])
-        assert_array_equal(gmm.gm.mu, dic['mu'])
-        assert_array_equal(gmm.gm.va, dic['va'])
+        assert_array__almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array__almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array__almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
 class test_log_domain(EmTest):
     """This class tests whether the GMM works in log domain."""
@@ -183,12 +183,12 @@
         ta = N.log(a)
         tna = N.log(na)
         if not N.all(N.isfinite(ta)):
-            print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+            print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k)
         else:
             assert_array_almost_equal(ta, la, DEF_DEC)
 
         if not N.all(N.isfinite(tna)):
-            print "precision problem for %s, %dd, %dk, need fixing" % (mode, d, k)
+            print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k)
         else:
             assert_array_almost_equal(tna, nla, DEF_DEC)
 

From scipy-svn at scipy.org  Wed Jun 13 06:29:33 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 05:29:33 -0500 (CDT)
Subject: [Scipy-svn] r3102 - trunk/Lib/sandbox/pyem/tests
Message-ID: <20070613102933.0BDAF39C1B2@new.scipy.org>

Author: cdavid
Date: 2007-06-13 05:29:29 -0500 (Wed, 13 Jun 2007)
New Revision: 3102

Modified:
   trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
Log:
Trivial fix for typo in pyem tests.

Modified: trunk/Lib/sandbox/pyem/tests/test_gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-13 10:26:06 UTC (rev 3101)
+++ trunk/Lib/sandbox/pyem/tests/test_gmm_em.py	2007-06-13 10:29:29 UTC (rev 3102)
@@ -137,9 +137,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC)
-        assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
-        assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
     def test_2d_full(self, level = 1):
         d = 2
@@ -151,9 +151,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array_equal(gmm.gm.w, dic['w'], DEF_DEC)
-        assert_array_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
-        assert_array_equal(gmm.gm.va, dic['va'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
     def test_2d_diag(self, level = 1):
         d = 2
@@ -165,9 +165,9 @@
         gmm = GMM(gm, 'test')
         EM().train(dic['data'], gmm)
 
-        assert_array__almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
-        assert_array__almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
-        assert_array__almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
+        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
 
 class test_log_domain(EmTest):
     """This class tests whether the GMM works in log domain."""


From scipy-svn at scipy.org  Wed Jun 13 09:56:35 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 08:56:35 -0500 (CDT)
Subject: [Scipy-svn] r3103 - trunk/Lib/sandbox/maskedarray
Message-ID: <20070613135635.5041839C1C5@new.scipy.org>

Author: pierregm
Date: 2007-06-13 08:56:32 -0500 (Wed, 13 Jun 2007)
New Revision: 3103

Modified:
   trunk/Lib/sandbox/maskedarray/core.py
   trunk/Lib/sandbox/maskedarray/mrecords.py
Log:
mrecords : fixed a bug in .filled

Modified: trunk/Lib/sandbox/maskedarray/core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/core.py	2007-06-13 10:29:29 UTC (rev 3102)
+++ trunk/Lib/sandbox/maskedarray/core.py	2007-06-13 13:56:32 UTC (rev 3103)
@@ -2638,8 +2638,10 @@
     if 1:
         x = arange(10)
         assert(x.ctypes.data == x.filled().ctypes.data)
-    if 1:
-        a = array([1,2,3,4],mask=[0,0,0,0],small_mask=False)
+    if 0:
+        a = array([1,2,3,4],mask=[0,0,0,0],small_mask=True)
+        a[1] = masked
+        a[1] = 1
         assert(a.ravel()._mask, [0,0,0,0])
         assert(a.compressed(), a)
         a[0] = masked

Modified: trunk/Lib/sandbox/maskedarray/mrecords.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/mrecords.py	2007-06-13 10:29:29 UTC (rev 3102)
+++ trunk/Lib/sandbox/maskedarray/mrecords.py	2007-06-13 13:56:32 UTC (rev 3103)
@@ -341,13 +341,13 @@
 If `fill_value` is None, uses self.fill_value.
         """
         _localdict = self.__dict__
-        d = _localdict['_data']
+        d = self._data
         fm = _localdict['_fieldmask']
         if not numeric.asarray(fm, dtype=bool_).any():
             return d
         #
         if fill_value is None:
-            value = _localdict['fill_value']
+            value = _localdict['_fill_value']
         else:
             value = fill_value
             if numeric.size(value) == 1:


From scipy-svn at scipy.org  Wed Jun 13 19:26:50 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 18:26:50 -0500 (CDT)
Subject: [Scipy-svn] r3104 - in trunk/Lib/io: . tests
Message-ID: <20070613232650.0BC6A39C036@new.scipy.org>

Author: wnbell
Date: 2007-06-13 18:26:47 -0500 (Wed, 13 Jun 2007)
New Revision: 3104

Modified:
   trunk/Lib/io/mmio.py
   trunk/Lib/io/tests/test_mmio.py
Log:
fixed sparse coordinate matrix indices to be base 1 as in the MM standard
added unittest to prevent problem in the future


Modified: trunk/Lib/io/mmio.py
===================================================================
--- trunk/Lib/io/mmio.py	2007-06-13 13:56:32 UTC (rev 3103)
+++ trunk/Lib/io/mmio.py	2007-06-13 23:26:47 UTC (rev 3104)
@@ -333,11 +333,10 @@
         assert symm=='general',`symm`
         if field in ['real','integer']:
             for i in range(entries):
-                target.write(format % (a.rowcol(i)+(a.getdata(i),)))
+                target.write(format % (a.row[i]+1,a.col[i]+1,a.data[i]))
         elif field=='complex':
             for i in range(entries):
-                value = a.getdata(i)
-                target.write(format % ((a.rowcol(i))+(real(value),imag(value))))
+                target.write(format % (a.row[i]+1,a.col[i]+1,reak(a.data[i]),imag(a.data[i])))
         elif field=='pattern':
             raise NotImplementedError,`field`
         else:

Modified: trunk/Lib/io/tests/test_mmio.py
===================================================================
--- trunk/Lib/io/tests/test_mmio.py	2007-06-13 13:56:32 UTC (rev 3103)
+++ trunk/Lib/io/tests/test_mmio.py	2007-06-13 23:26:47 UTC (rev 3104)
@@ -6,6 +6,7 @@
 
 set_package_path()
 from io.mmio import mminfo,mmread,mmwrite
+import scipy
 restore_path()
 
 class test_mmio_array(NumpyTestCase):
@@ -151,5 +152,25 @@
         b = mmread(fn).todense()
         assert_array_almost_equal(a,b)
 
+    def check_simple_write_read(self):
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+        V = array([  1.0,   6.0,   10.5, 0.015,   250.5,  -280.0, 33.32, 12.0 ])
+        
+        b = scipy.sparse.coo_matrix((V,(I,J)),dims=(5,5))
+
+        fn = mktemp()
+        mmwrite(fn,b)
+        
+        assert_equal(mminfo(fn),(5,5,8,'coordinate','real','general'))
+        a = [[1,    0,      0,       6,      0],
+             [0,   10.5,    0,       0,      0],
+             [0,    0,    .015,      0,      0],
+             [0,  250.5,    0,     -280,    33.32],
+             [0,    0,      0,       0,     12]]
+        b = mmread(fn).todense()
+        assert_array_almost_equal(a,b)
+
+
 if __name__ == "__main__":
     NumpyTest().run()


From scipy-svn at scipy.org  Wed Jun 13 19:44:05 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 13 Jun 2007 18:44:05 -0500 (CDT)
Subject: [Scipy-svn] r3105 - trunk/Lib/io
Message-ID: <20070613234405.C168D39C06D@new.scipy.org>

Author: wnbell
Date: 2007-06-13 18:44:04 -0500 (Wed, 13 Jun 2007)
New Revision: 3105

Modified:
   trunk/Lib/io/mmio.py
Log:
fixed bug in MM io for non-COO sparse formats


Modified: trunk/Lib/io/mmio.py
===================================================================
--- trunk/Lib/io/mmio.py	2007-06-13 23:26:47 UTC (rev 3104)
+++ trunk/Lib/io/mmio.py	2007-06-13 23:44:04 UTC (rev 3105)
@@ -333,10 +333,10 @@
         assert symm=='general',`symm`
         if field in ['real','integer']:
             for i in range(entries):
-                target.write(format % (a.row[i]+1,a.col[i]+1,a.data[i]))
+                target.write(format % (a.rowcol(i)[0] + 1,a.rowcol(i)[1] + 1,a.getdata(i))) #convert base 0 to base 1
         elif field=='complex':
             for i in range(entries):
-                target.write(format % (a.row[i]+1,a.col[i]+1,reak(a.data[i]),imag(a.data[i])))
+                target.write(format % (a.rowcol(i)[0] + 1,a.rowcol(i)[1] + 1,real(a.getdata(i)),imag(a.getdata(i))))
         elif field=='pattern':
             raise NotImplementedError,`field`
         else:


From scipy-svn at scipy.org  Thu Jun 14 21:04:34 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu, 14 Jun 2007 20:04:34 -0500 (CDT)
Subject: [Scipy-svn] r3106 - trunk/Lib/sandbox/timeseries/lib
Message-ID: <20070615010434.8C49E39C0E2@new.scipy.org>

Author: mattknox_ca
Date: 2007-06-14 20:00:10 -0500 (Thu, 14 Jun 2007)
New Revision: 3106

Modified:
   trunk/Lib/sandbox/timeseries/lib/moving_funcs.py
Log:
reversed params to expmave_sub . Behaviour of ufuncs created from frompyfunc seems to have changed at some point causing this to be reversed

Modified: trunk/Lib/sandbox/timeseries/lib/moving_funcs.py
===================================================================
--- trunk/Lib/sandbox/timeseries/lib/moving_funcs.py	2007-06-13 23:44:04 UTC (rev 3105)
+++ trunk/Lib/sandbox/timeseries/lib/moving_funcs.py	2007-06-15 01:00:10 UTC (rev 3106)
@@ -202,7 +202,7 @@
     #
     k = 2./float(span + 1)
     def expmave_sub(a, b):
-        return b + k * (a - b)
+        return a + k * (b - a)
     #
     data._data.flat = N.frompyfunc(expmave_sub, 2, 1).accumulate(_data)
     if ismasked:


From scipy-svn at scipy.org  Fri Jun 15 12:15:08 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 15 Jun 2007 11:15:08 -0500 (CDT)
Subject: [Scipy-svn] r3107 - in trunk/Lib/sandbox/maskedarray: . tests
Message-ID: <20070615161508.08CC439C055@new.scipy.org>

Author: pierregm
Date: 2007-06-15 11:14:55 -0500 (Fri, 15 Jun 2007)
New Revision: 3107

Modified:
   trunk/Lib/sandbox/maskedarray/core.py
   trunk/Lib/sandbox/maskedarray/extras.py
   trunk/Lib/sandbox/maskedarray/mrecords.py
   trunk/Lib/sandbox/maskedarray/tests/test_core.py
   trunk/Lib/sandbox/maskedarray/tests/test_extras.py
   trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py
Log:
core     : __setitem__ now calls __setmask__ when value is masked
core     : added a test in filled on m.any()
extras   : make_rowcols : make sure the mask is copied to avoid propagation
mrecords : fixed a couple of bugs

Modified: trunk/Lib/sandbox/maskedarray/core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/core.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/core.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -1132,11 +1132,13 @@
 #            raise IndexError, msg
         #....
         if value is masked:
-            if self._mask is nomask:
-                self._mask = make_mask_none(self.shape)
-            else:
-                self._mask = self._mask.copy()
-            self._mask[indx] = True
+            m = self._mask
+            if m is nomask:
+                m = make_mask_none(self.shape)
+#            else:
+#                m = m.copy()
+            m[indx] = True
+            self.__setmask__(m)
             return
         #....
         dval = numeric.asarray(value).astype(self.dtype)
@@ -1261,7 +1263,7 @@
 If `fill_value` is None, uses self.fill_value.
         """
         m = self._mask
-        if m is nomask:
+        if m is nomask or not m.any():
             return self._data
         #
         if fill_value is None:
@@ -2645,4 +2647,13 @@
         assert(a.ravel()._mask, [0,0,0,0])
         assert(a.compressed(), a)
         a[0] = masked
-        assert(a.compressed()._mask, [0,0,0])
\ No newline at end of file
+        assert(a.compressed()._mask, [0,0,0])
+    if 1:
+        x = array(0, mask=0)
+        I = x.ctypes.data
+        J = x.filled().ctypes.data
+        print (I,J)
+        x = array([0,0], mask=0)
+        (I,J) = (x.ctypes.data, x.filled().ctypes.data)
+        print (I,J)
+        
\ No newline at end of file

Modified: trunk/Lib/sandbox/maskedarray/extras.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/extras.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/extras.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -411,6 +411,7 @@
     if m is nomask or not m.any():
         return a
     maskedval = m.nonzero()
+    a._mask = a._mask.copy()
     if not axis:
         a[function_base.unique(maskedval[0])] = masked
     if axis in [None, 1, -1]:
@@ -648,7 +649,15 @@
 ################################################################################
 if __name__ == '__main__':
     #
+    import numpy as N
+    from maskedarray.testutils import assert_equal
     if 1:
-        x = arange(10)
-        x[0] = masked
-        print dot(x,x)
\ No newline at end of file
+        n = N.arange(1,7)
+        #
+        m = [1,0,0,0,0,0]
+        a = masked_array(n, mask=m).reshape(2,3)
+        b = masked_array(n, mask=m).reshape(3,2)
+        c = dot(a,b, True)
+        assert_equal(c.mask, [[1,1],[1,0]])
+        c = dot(a,b,False)
+        assert_equal(c, N.dot(a.filled(0), b.filled(0)))
\ No newline at end of file

Modified: trunk/Lib/sandbox/maskedarray/mrecords.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/mrecords.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/mrecords.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -115,7 +115,7 @@
         if names is not None:
             descr = _checknames(descr,names)
         _names = descr.names    
-        mdescr = [(t[0],'|b1') for t in descr.descr]
+        mdescr = [(n,'|b1') for n in _names]
         #
         shape = numeric.asarray(data[0]).shape
         if isinstance(shape, int):
@@ -129,7 +129,11 @@
             _fieldmask = data._fieldmask
         elif isinstance(data, recarray):
             _data = data
-            _fieldmask = mask
+            if mask is nomask:
+                _fieldmask = data.astype(mdescr)
+                _fieldmask.flat = tuple([False]*len(mdescr))
+            else:
+                _fieldmask = mask
         else:
             _data = recarray(shape, dtype=descr)
             _fieldmask = recarray(shape, dtype=mdescr)
@@ -179,7 +183,7 @@
             _data = self._data
             _mask = self._fieldmask
             obj = numeric.asarray(_data.__getattribute__(attr)).view(MaskedArray)
-            obj._mask = make_mask(_mask.__getattribute__(attr))
+            obj.__setmask__(_mask.__getattribute__(attr))
             return obj
         raise AttributeError,"No attribute '%s' !" % attr
             
@@ -232,6 +236,10 @@
         obj = ndarray.__getitem__(self, indx).view(type(self))
         obj._fieldmask = _localdict['_fieldmask'][indx]
         return obj
+    #............................................
+    def __setitem__(self, indx, value):
+        """Sets the given record to value."""
+        MaskedArray.__setitem__(self, indx, value)
         
 #    def __getslice__(self, i, j):
 #        """Returns the slice described by [i,j]."""
@@ -243,13 +251,12 @@
     def __setslice__(self, i, j, value):
         """Sets the slice described by [i,j] to `value`."""
         _localdict = self.__dict__
-        
         d = self._data
         m = _localdict['_fieldmask']
         names = self.dtype.names
         if value is masked:
             for n in names:
-                m[i:j][n] = masked
+                m[i:j][n] = True
         elif not self._hardmask:
             fval = filled(value)
             mval = getmaskarray(value)
@@ -484,7 +491,7 @@
         descr = parsed._descr
 
     try:
-        retval = numeric.array(reclist, dtype = descr)
+        retval = numeric.array(reclist, dtype = descr).view(recarray)
     except TypeError:  # list of lists instead of list of tuples
         if (shape is None or shape == 0):
             shape = len(reclist)*2
@@ -645,13 +652,27 @@
 ################################################################################
 if __name__ == '__main__':
     import numpy as N
+    from maskedarray.testutils import assert_equal
     if 1:
         d = N.arange(5)
         m = MA.make_mask([1,0,0,1,1])
         base_d = N.r_[d,d[::-1]].reshape(2,-1).T
         base_m = N.r_[[m, m[::-1]]].T
         base = MA.array(base_d, mask=base_m)    
-        mrecord = fromarrays(base.T,)
-        
+        mrecord = fromarrays(base.T,dtype=[('a',N.float_),('b',N.float_)])
         mrec = MaskedRecords(mrecord)
+        #
+        mrec.a[3:] = 5
+        assert_equal(mrec.a, [0,1,2,5,5])
+        assert_equal(mrec.a._mask, [1,0,0,0,0])
+        #
+        mrec.b[3:] = masked
+        assert_equal(mrec.b, [4,3,2,1,0])
+        assert_equal(mrec.b._mask, [1,1,0,1,1])
+        #
+        mrec[:2] = masked
+        assert_equal(mrec._mask, [1,1,0,0,0])
+        mrec[-1] = masked
+        assert_equal(mrec._mask, [1,1,0,0,1])
+
         
\ No newline at end of file

Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_core.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/tests/test_core.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -240,7 +240,6 @@
         assert(minimum(xm, xm).mask)
         assert(xm.filled().dtype is xm.data.dtype)
         x = array(0, mask=0)
-#        assert(x.filled() is x.data)
         assert_equal(x.filled().ctypes.data, x.ctypes.data)
         assert_equal(str(xm), str(masked_print_option))
     #.........................

Modified: trunk/Lib/sandbox/maskedarray/tests/test_extras.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_extras.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/tests/test_extras.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -219,18 +219,27 @@
         m = [1,0,0,0,0,0]
         a = masked_array(n, mask=m).reshape(2,3)
         b = masked_array(n, mask=m).reshape(3,2)
-        c = dot(a,b)
+        c = dot(a,b,True)
         assert_equal(c.mask, [[1,1],[1,0]])
-        c = dot(b,a)
+        c = dot(b,a,True)
         assert_equal(c.mask, [[1,1,1],[1,0,0],[1,0,0]])
+        c = dot(a,b,False)
+        assert_equal(c, N.dot(a.filled(0), b.filled(0)))
+        c = dot(b,a,False)
+        assert_equal(c, N.dot(b.filled(0), a.filled(0)))
         #        
         m = [0,0,0,0,0,1]
         a = masked_array(n, mask=m).reshape(2,3)
         b = masked_array(n, mask=m).reshape(3,2)
-        c = dot(a,b)
+        c = dot(a,b,True)
         assert_equal(c.mask,[[0,1],[1,1]])        
-        c = dot(b,a)
+        c = dot(b,a,True)
         assert_equal(c.mask, [[0,0,1],[0,0,1],[1,1,1]])
+        c = dot(a,b,False)
+        assert_equal(c, N.dot(a.filled(0), b.filled(0)))
+        assert_equal(c, dot(a,b))
+        c = dot(b,a,False)
+        assert_equal(c, N.dot(b.filled(0), a.filled(0)))
         #        
         m = [0,0,0,0,0,0]
         a = masked_array(n, mask=m).reshape(2,3)
@@ -242,24 +251,36 @@
         #        
         a = masked_array(n, mask=[1,0,0,0,0,0]).reshape(2,3)
         b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2)
-        c = dot(a,b)
+        c = dot(a,b,True)
         assert_equal(c.mask,[[1,1],[0,0]])
-        c = dot(b,a)
+        c = dot(a,b,False)
+        assert_equal(c, N.dot(a.filled(0),b.filled(0)))
+        c = dot(b,a,True)
         assert_equal(c.mask,[[1,0,0],[1,0,0],[1,0,0]])
+        c = dot(b,a,False)
+        assert_equal(c, N.dot(b.filled(0),a.filled(0)))
         #        
         a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3)
         b = masked_array(n, mask=[0,0,0,0,0,0]).reshape(3,2)
+        c = dot(a,b,True)
+        assert_equal(c.mask,[[0,0],[1,1]])
         c = dot(a,b)
-        assert_equal(c.mask,[[0,0],[1,1]])
-        c = dot(b,a)
+        assert_equal(c, N.dot(a.filled(0),b.filled(0)))        
+        c = dot(b,a,True)
         assert_equal(c.mask,[[0,0,1],[0,0,1],[0,0,1]])
+        c = dot(b,a,False)
+        assert_equal(c, N.dot(b.filled(0), a.filled(0)))
         #        
         a = masked_array(n, mask=[0,0,0,0,0,1]).reshape(2,3)
         b = masked_array(n, mask=[0,0,1,0,0,0]).reshape(3,2)
-        c = dot(a,b)
+        c = dot(a,b,True)
         assert_equal(c.mask,[[1,0],[1,1]])
-        c = dot(b,a)
+        c = dot(a,b,False)
+        assert_equal(c, N.dot(a.filled(0),b.filled(0)))
+        c = dot(b,a,True)
         assert_equal(c.mask,[[0,0,1],[1,1,1],[0,0,1]])
+        c = dot(b,a,False)
+        assert_equal(c, N.dot(b.filled(0),a.filled(0)))
         
     def test_mediff1d(self):    
         "Tests mediff1d"    

Modified: trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py	2007-06-15 01:00:10 UTC (rev 3106)
+++ trunk/Lib/sandbox/maskedarray/tests/test_mrecords.py	2007-06-15 16:14:55 UTC (rev 3107)
@@ -41,54 +41,74 @@
         base_d = N.r_[d,d[::-1]].reshape(2,-1).T
         base_m = N.r_[[m, m[::-1]]].T
         base = MA.array(base_d, mask=base_m)    
-        mrecord = fromarrays(base.T,)
+        mrecord = fromarrays(base.T, dtype=[('a',N.float_),('b',N.float_)])
         self.data = [d, m, mrecord]
         
     def test_get(self):
         "Tests fields retrieval"
         [d, m, mrec] = self.data
         mrec = mrec.copy()
-        assert_equal(mrec.f0, MA.array(d,mask=m))
-        assert_equal(mrec.f1, MA.array(d[::-1],mask=m[::-1]))
+        assert_equal(mrec.a, MA.array(d,mask=m))
+        assert_equal(mrec.b, MA.array(d[::-1],mask=m[::-1]))
         assert((mrec._fieldmask == N.core.records.fromarrays([m, m[::-1]])).all())
         assert_equal(mrec._mask, N.r_[[m,m[::-1]]].all(0))
-        assert_equal(mrec.f0[1], mrec[1].f0)
+        assert_equal(mrec.a[1], mrec[1].a)
         #
         assert(isinstance(mrec[:2], MaskedRecords))
-        assert_equal(mrec[:2]['f0'], d[:2])
+        assert_equal(mrec[:2]['a'], d[:2])
         
     def test_set(self):
         "Tests setting fields/attributes."
         [d, m, mrecord] = self.data
-        mrecord.f0._data[:] = 5
-        assert_equal(mrecord['f0']._data, [5,5,5,5,5])
-        mrecord.f0 = 1
-        assert_equal(mrecord['f0']._data, [1]*5)
-        assert_equal(getmaskarray(mrecord['f0']), [0]*5)
-        mrecord.f1 = MA.masked
-        assert_equal(mrecord.f1.mask, [1]*5)
-        assert_equal(getmaskarray(mrecord['f1']), [1]*5)
+        mrecord.a._data[:] = 5
+        assert_equal(mrecord['a']._data, [5,5,5,5,5])
+        mrecord.a = 1
+        assert_equal(mrecord['a']._data, [1]*5)
+        assert_equal(getmaskarray(mrecord['a']), [0]*5)
+        mrecord.b = MA.masked
+        assert_equal(mrecord.b.mask, [1]*5)
+        assert_equal(getmaskarray(mrecord['b']), [1]*5)
         mrecord._mask = MA.masked
-        assert_equal(getmaskarray(mrecord['f1']), [1]*5)
-        assert_equal(mrecord['f0']._mask, mrecord['f1']._mask)
+        assert_equal(getmaskarray(mrecord['b']), [1]*5)
+        assert_equal(mrecord['a']._mask, mrecord['b']._mask)
         mrecord._mask = MA.nomask
-        assert_equal(getmaskarray(mrecord['f1']), [0]*5)
-        assert_equal(mrecord['f0']._mask, mrecord['f1']._mask)   
+        assert_equal(getmaskarray(mrecord['b']), [0]*5)
+        assert_equal(mrecord['a']._mask, mrecord['b']._mask)   
         #
+    def test_setfields(self):
+        "Tests setting fields."
+        [d, m, mrecord] = self.data
+        mrecord.a[3:] = 5
+        assert_equal(mrecord.a, [0,1,2,5,5])
+        assert_equal(mrecord.a._mask, [1,0,0,0,0])
+        #
+        mrecord.b[3:] = masked
+        assert_equal(mrecord.b, [4,3,2,1,0])
+        assert_equal(mrecord.b._mask, [1,1,0,1,1])
+        
     def test_setslices(self):
         "Tests setting slices."
         [d, m, mrec] = self.data        
         mrec[:2] = 5
-        assert_equal(mrec.f0._data, [5,5,2,3,4])
-        assert_equal(mrec.f1._data, [5,5,2,1,0])
-        assert_equal(mrec.f0._mask, [0,0,0,1,1])
-        assert_equal(mrec.f1._mask, [0,0,0,0,1])
+        assert_equal(mrec.a._data, [5,5,2,3,4])
+        assert_equal(mrec.b._data, [5,5,2,1,0])
+        assert_equal(mrec.a._mask, [0,0,0,1,1])
+        assert_equal(mrec.b._mask, [0,0,0,0,1])
+        #
+        mrec[:2] = masked
+        assert_equal(mrec._mask, [1,1,0,0,1])
+        mrec[-2] = masked
+        assert_equal(mrec._mask, [1,1,0,1,1])
+        #
+    def test_setslices_hardmask(self):
+        "Tests setting slices w/ hardmask."
+        [d, m, mrec] = self.data      
         mrec.harden_mask()
         mrec[-2:] = 5
-        assert_equal(mrec.f0._data, [5,5,2,3,4])
-        assert_equal(mrec.f1._data, [5,5,2,5,0])
-        assert_equal(mrec.f0._mask, [0,0,0,1,1])
-        assert_equal(mrec.f1._mask, [0,0,0,0,1]) 
+        assert_equal(mrec.a._data, [0,1,2,3,4])
+        assert_equal(mrec.b._data, [4,3,2,5,0])
+        assert_equal(mrec.a._mask, [1,0,0,1,1])
+        assert_equal(mrec.b._mask, [1,1,0,0,1]) 
         
     def test_hardmask(self):
         "Test hardmask"
@@ -101,24 +121,26 @@
         mrec.soften_mask()
         assert(not mrec._hardmask)
         mrec._mask = nomask
-        assert(mrec['f1']._mask is nomask)
-        assert_equal(mrec['f0']._mask,mrec['f1']._mask)   
+        assert(mrec['b']._mask is nomask)
+        assert_equal(mrec['a']._mask,mrec['b']._mask)   
 
     def test_fromrecords(self):
         "Test from recarray."
         [d, m, mrec] = self.data
-        nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]])
-        mrecfr = fromrecords(nrec.tolist())
-        assert_equal(mrecfr.f0, mrec.f0)
-        assert_equal(mrecfr.dtype, mrec.dtype)
+        nrec = N.core.records.fromarrays(N.r_[[d,d[::-1]]],
+                                         dtype=[('a',N.float_),('b',N.float_)])
         #....................
         mrecfr = fromrecords(nrec)
-        assert_equal(mrecfr.f0, mrec.f0)
+        assert_equal(mrecfr.a, mrec.a)
         assert_equal(mrecfr.dtype, mrec.dtype)
         #....................
         tmp = mrec[::-1] #.tolist()
         mrecfr = fromrecords(tmp)
-        assert_equal(mrecfr.f0, mrec.f0[::-1])
+        assert_equal(mrecfr.a, mrec.a[::-1])
+        #....................        
+        mrecfr = fromrecords(nrec.tolist())
+        assert_equal(mrecfr.a, mrec.a)
+        assert_equal(mrecfr.dtype, mrec.dtype)
         
     def test_fromtextfile(self):        
         "Tests reading from a text file."


From scipy-svn at scipy.org  Fri Jun 15 13:57:32 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 15 Jun 2007 12:57:32 -0500 (CDT)
Subject: [Scipy-svn] r3108 - trunk/Lib/stats
Message-ID: <20070615175732.7861E39C141@new.scipy.org>

Author: oliphant
Date: 2007-06-15 12:57:26 -0500 (Fri, 15 Jun 2007)
New Revision: 3108

Modified:
   trunk/Lib/stats/stats.py
Log:
Replace 'as' variable name

Modified: trunk/Lib/stats/stats.py
===================================================================
--- trunk/Lib/stats/stats.py	2007-06-15 16:14:55 UTC (rev 3107)
+++ trunk/Lib/stats/stats.py	2007-06-15 17:57:26 UTC (rev 3108)
@@ -2094,8 +2094,8 @@
     )
     """
     it = np.argsort(a)
-    as = a[it]
-    return as, it
+    as_ = a[it]
+    return as_, it
 
 def rankdata(a):
     """Ranks the data in a, dealing with ties appropriately.


From scipy-svn at scipy.org  Mon Jun 18 19:22:20 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 18 Jun 2007 18:22:20 -0500 (CDT)
Subject: [Scipy-svn] r3109 - trunk/Lib/io
Message-ID: <20070618232220.EC1E739C1B7@new.scipy.org>

Author: wnbell
Date: 2007-06-18 18:22:19 -0500 (Mon, 18 Jun 2007)
New Revision: 3109

Modified:
   trunk/Lib/io/mmio.py
Log:
added support for (sparse) "pattern" Matrix Market matrices 


Modified: trunk/Lib/io/mmio.py
===================================================================
--- trunk/Lib/io/mmio.py	2007-06-15 17:57:26 UTC (rev 3108)
+++ trunk/Lib/io/mmio.py	2007-06-18 23:22:19 UTC (rev 3109)
@@ -115,7 +115,7 @@
     elif field=='complex':
         dtype='D'
     elif field=='pattern':
-        raise NotImplementedError,`field`
+        dtype='d'
     else:
         raise ValueError,`field`
 
@@ -123,7 +123,8 @@
     is_complex = field=='complex'
     is_skew = symm=='skew-symmetric'
     is_herm = symm=='hermitian'
-
+    is_pattern = field=='pattern'
+    
     if rep == 'array':
         a = zeros((rows,cols),dtype=dtype)
         line = 1
@@ -193,7 +194,9 @@
                 l = line.split()
                 i = int(l[0])-1
                 j = int(l[1])-1
-                if is_complex:
+                if is_pattern:
+                    aij = 1.0 #use 1.0 for pattern matrices
+                elif is_complex:
                     aij = complex(*map(float,l[2:]))
                 else:
                     aij = float(l[2])


From scipy-svn at scipy.org  Tue Jun 19 11:08:00 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Tue, 19 Jun 2007 10:08:00 -0500 (CDT)
Subject: [Scipy-svn] r3110 - in trunk/Lib/cluster: . src tests
Message-ID: <20070619150800.4DC9F39C08F@new.scipy.org>

Author: cdavid
Date: 2007-06-19 10:07:48 -0500 (Tue, 19 Jun 2007)
New Revision: 3110

Added:
   trunk/Lib/cluster/src/vq.c
   trunk/Lib/cluster/src/vq.def
   trunk/Lib/cluster/src/vq.tpl
   trunk/Lib/cluster/src/vq_module.c
Removed:
   trunk/Lib/cluster/src/swig_num.i
   trunk/Lib/cluster/src/vq.i
   trunk/Lib/cluster/src/vq_wrap.cpp
Modified:
   trunk/Lib/cluster/setup.py
   trunk/Lib/cluster/src/vq.h
   trunk/Lib/cluster/tests/test_vq.py
   trunk/Lib/cluster/vq.py
Log:
Add support for rank 1 arrays in kmean:

* swig interface was not compatible anymore with current swig, 
  so the module was converted to pure C python module
* all tests pass again, including for rank 1 array.


Modified: trunk/Lib/cluster/setup.py
===================================================================
--- trunk/Lib/cluster/setup.py	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/setup.py	2007-06-19 15:07:48 UTC (rev 3110)
@@ -3,13 +3,18 @@
 from os.path import join
 
 def configuration(parent_package='',top_path=None):
-    from numpy.distutils.misc_util import Configuration
+    from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
     config = Configuration('cluster',parent_package,top_path)
 
     config.add_data_dir('tests')
 
     config.add_extension('_vq',
-        sources=[join('src', 'vq_wrap.cpp')])
+        sources=[join('src', 'vq_module.c'), join('src', 'vq.c')],
+        include_dirs = [get_numpy_include_dirs()])
+    #config.add_extension('_vq',
+    #    sources=[join('src', 'vq_wrap.cpp')])
+    #config.add_extension('_c_vq',
+    #    sources=[join('src', 'vq.c') ])
 
     return config
 

Deleted: trunk/Lib/cluster/src/swig_num.i
===================================================================
--- trunk/Lib/cluster/src/swig_num.i	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/swig_num.i	2007-06-19 15:07:48 UTC (rev 3110)
@@ -1,644 +0,0 @@
-
-/************************************************************************/
-/* swig_num.i -- typemaps for mapping C arrays to Python Numeric arrays */
-/*                                                                      */    
-/* This file is auto-generated by swig_num_gen.py.                      */
-/* DO NOT EDIT THIS FILE DIRECTLY unless you want your changes          */
-/* clobbered by the next run of the generator.                          */
-/************************************************************************/
-
-%include typemaps.i
-
-/************************************************************************/
-/*                   helper functions and initialization                */
-/************************************************************************/
-%{
-
-#include "numpy/arrayobject.h"
-
-// hmmm. how do we prevent typedefs from conflicting
-// with users definition on complex numbers?
-//#include "complex_test.h"
-//typedef struct{ float real; 
-//                float imag;} complex;
-//typedef struct{ double real; 
-//                double imag;} zcomplex;
-
-
-// used for converting typecodes to memory sizes.
-int char_to_size(char type)
-{
-    if (type=='i') return sizeof(int);
-    if (type=='f') return sizeof(float);
-    if (type=='d') return sizeof(double);
-    if (type=='c') return 2*sizeof(float);
-    if (type=='z') return 2*sizeof(double);
-}
-int char_to_numtype(char type)
-{
-    if (type=='i') return 'i';
-    if (type=='f') return 'f';
-    if (type=='d') return 'd';
-    if (type=='c') return 'F';
-    if (type=='z') return 'D';
-}
-%}
-
-%init %{
-        import_array();
-%}
-
-%{
-
-typedef int int_IN_D0;
-typedef float float_IN_D0;
-typedef double double_IN_D0;
-typedef int int_IN_D1;
-typedef float float_IN_D1;
-typedef double double_IN_D1;
-typedef int int_IN_D2;
-typedef float float_IN_D2;
-typedef double double_IN_D2;
-typedef int int_IN_D0_D1;
-typedef float float_IN_D0_D1;
-typedef double double_IN_D0_D1;
-typedef int int_IN_D0_D2;
-typedef float float_IN_D0_D2;
-typedef double double_IN_D0_D2;
-typedef int int_IN_D1_D2;
-typedef float float_IN_D1_D2;
-typedef double double_IN_D1_D2;
-typedef int int_IN_D0_D1_D2;
-typedef float float_IN_D0_D1_D2;
-typedef double double_IN_D0_D1_D2;
-
-
-typedef int int_ARGOUT_D0;
-typedef float float_ARGOUT_D0;
-typedef double double_ARGOUT_D0;
-typedef int int_ARGOUT_D1;
-typedef float float_ARGOUT_D1;
-typedef double double_ARGOUT_D1;
-typedef int int_ARGOUT_D2;
-typedef float float_ARGOUT_D2;
-typedef double double_ARGOUT_D2;
-typedef int int_ARGOUT_D0_D1;
-typedef float float_ARGOUT_D0_D1;
-typedef double double_ARGOUT_D0_D1;
-typedef int int_ARGOUT_D0_D2;
-typedef float float_ARGOUT_D0_D2;
-typedef double double_ARGOUT_D0_D2;
-typedef int int_ARGOUT_D1_D2;
-typedef float float_ARGOUT_D1_D2;
-typedef double double_ARGOUT_D1_D2;
-typedef int int_ARGOUT_D0_D1_D2;
-typedef float float_ARGOUT_D0_D1_D2;
-typedef double double_ARGOUT_D0_D1_D2;
-
-
-typedef int int_ARGOUT_TUPLE_D0;
-typedef float float_ARGOUT_TUPLE_D0;
-typedef double double_ARGOUT_TUPLE_D0;
-typedef int int_ARGOUT_TUPLE_D1;
-typedef float float_ARGOUT_TUPLE_D1;
-typedef double double_ARGOUT_TUPLE_D1;
-typedef int int_ARGOUT_TUPLE_D2;
-typedef float float_ARGOUT_TUPLE_D2;
-typedef double double_ARGOUT_TUPLE_D2;
-typedef int int_ARGOUT_TUPLE_D0_D1;
-typedef float float_ARGOUT_TUPLE_D0_D1;
-typedef double double_ARGOUT_TUPLE_D0_D1;
-typedef int int_ARGOUT_TUPLE_D0_D2;
-typedef float float_ARGOUT_TUPLE_D0_D2;
-typedef double double_ARGOUT_TUPLE_D0_D2;
-typedef int int_ARGOUT_TUPLE_D1_D2;
-typedef float float_ARGOUT_TUPLE_D1_D2;
-typedef double double_ARGOUT_TUPLE_D1_D2;
-typedef int int_ARGOUT_TUPLE_D0_D1_D2;
-typedef float float_ARGOUT_TUPLE_D0_D1_D2;
-typedef double double_ARGOUT_TUPLE_D0_D1_D2;
-
-
-typedef int int_OUT_D0;
-typedef float float_OUT_D0;
-typedef double double_OUT_D0;
-typedef int int_OUT_D1;
-typedef float float_OUT_D1;
-typedef double double_OUT_D1;
-typedef int int_OUT_D2;
-typedef float float_OUT_D2;
-typedef double double_OUT_D2;
-typedef int int_OUT_D0_D1;
-typedef float float_OUT_D0_D1;
-typedef double double_OUT_D0_D1;
-typedef int int_OUT_D0_D2;
-typedef float float_OUT_D0_D2;
-typedef double double_OUT_D0_D2;
-typedef int int_OUT_D1_D2;
-typedef float float_OUT_D1_D2;
-typedef double double_OUT_D1_D2;
-typedef int int_OUT_D0_D1_D2;
-typedef float float_OUT_D0_D1_D2;
-typedef double double_OUT_D0_D1_D2;
-
-
-%}
-
-/************************************************************************/
-/*                        typemap code for IN arguments                 */
-/************************************************************************/
-%{                                                                        
-PyArrayObject* IN_in(PyObject* source, char* basetype_string,             
-                             int** target_dims, int dims)                 
-{                                                                         
-    PyArrayObject *a_obj;                                                 
-    char ar_type = char_to_numtype(basetype_string[0]);                   
-    a_obj = (PyArrayObject*) PyArray_ContiguousFromObject(source,ar_type, 
-                                                             dims,dims);  
-    if (a_obj == NULL)                                                    
-    {                                                                     
-        //PyArray Contiguous From Object will set the error value.        
-    	return NULL;                                                      
-    }                                                                     
-    for(int i = 0; i < dims;i++)                                          
-    {                                                                     
-        *(target_dims[i]) = a_obj->dimensions[i];                         
-    }                                                                     
-    return a_obj;                                                         
-}                                                                         
-                                                                          
-%}                                                                        
-%typemap(python,freearg) DECREF {  Py_XDECREF($arg); }                    
-    
-%typemap(python,in) IN_D0 *
-{
-    int* targ_dims[1] = {_d0};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D0 * = DECREF;
-    
-%typemap(python,in) IN_D1 *
-{
-    int* targ_dims[1] = {_d1};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D1 * = DECREF;
-    
-%typemap(python,in) IN_D2 *
-{
-    int* targ_dims[1] = {_d2};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,1);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D2 * = DECREF;
-    
-%typemap(python,in) IN_D0_D1 *
-{
-    int* targ_dims[2] = {_d0,_d1};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D0_D1 * = DECREF;
-    
-%typemap(python,in) IN_D0_D2 *
-{
-    int* targ_dims[2] = {_d0,_d2};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D0_D2 * = DECREF;
-    
-%typemap(python,in) IN_D1_D2 *
-{
-    int* targ_dims[2] = {_d1,_d2};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,2);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D1_D2 * = DECREF;
-    
-%typemap(python,in) IN_D0_D1_D2 *
-{
-    int* targ_dims[3] = {_d0,_d1,_d2};
-    PyArrayObject* a_obj = IN_in($source,"$basetype",targ_dims,3);
-    if (a_obj == NULL) return NULL;
-    $target = ($type) a_obj->data;
-    $source = (PyObject*)a_obj;
-}
-
-%typemap(python,freearg) IN_D0_D1_D2 * = DECREF;
-
-/************************************************************************/
-/*                  typemap code for ARGOUT arguments                   */
-/************************************************************************/
-%{
-char* ARGOUT_check(char* basetype_string,int* dims, int dim_len)
-{
-    char *rdata;
-    int element_size = char_to_size(basetype_string[0]);
-    int tot_length = 1;
-    for (int i = 0; i < dim_len; i++)
-        tot_length *= dims[i];        
-    rdata = (char*)malloc(tot_length*element_size);
-    if(rdata == NULL)
-	{
-	    PyErr_SetString(PyExc_MemoryError, "can't allocate memory for output array for arg$argnum");
-	    return NULL;
-	}	
-    return rdata;
-}
-
-PyObject* ARGOUT_argout(char* source, char* basetype_string, 
-                                int* dims, int dim_len)
-{
-    PyArrayObject *res;
-    char array_type = char_to_numtype(basetype_string[0]);
-    res = (PyArrayObject *)PyArray_FromDimsAndData(dim_len, dims, 
-                                                   array_type,source);
-    if(res == NULL)
-    {
-        //PyErr_SetString(PyExc_ValueError, "error converting internal data to array");
-    	return NULL;
-    }
-    res->flags |= NPY_OWNDATA; // we want the array to deallocate mem when it is finished.
-    // stick result in the output tuple (target).
-    // Need to think about generality of this one...
-    return (PyObject *) res;
-}
-%}
-
-%typemap(python, ignore) ARGOUT_D0 * {}
-%typemap(python, check) ARGOUT_D0 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d0};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D0 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d0};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D1 * {}
-%typemap(python, check) ARGOUT_D1 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d1};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D1 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d1};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D2 * {}
-%typemap(python, check) ARGOUT_D2 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d2};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D2 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D0_D1 * {}
-%typemap(python, check) ARGOUT_D0_D1 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d1};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D0_D1 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d1};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D0_D2 * {}
-%typemap(python, check) ARGOUT_D0_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d2};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D0_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D1_D2 * {}
-%typemap(python, check) ARGOUT_D1_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d1,*_d2};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D1_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d1,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) ARGOUT_D0_D1_D2 * {}
-%typemap(python, check) ARGOUT_D0_D1_D2 *
-{
-    int dim_len = 3;
-    int dims[3] = {*_d0,*_d1,*_d2};
-    $target = ($type) ARGOUT_check("$basetype",dims,dim_len);
-    if ($target == NULL) return NULL;
-}
-%typemap(python, argout) ARGOUT_D0_D1_D2 *
-{
-    int dim_len = 3;
-    int dims[3] = {*_d0,*_d1,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-/************************************************************************/
-/*             typemap code for ARGOUT_TUPLE arguments                  */
-/************************************************************************/
-
-%typemap(python, ignore) ARGOUT_TUPLE_D0 * = ARGOUT_D0 *;
-%typemap(python, check) ARGOUT_TUPLE_D0 * = ARGOUT_D0 *;
-%typemap(python, argout) ARGOUT_TUPLE_D0 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d0};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D1 * = ARGOUT_D1 *;
-%typemap(python, check) ARGOUT_TUPLE_D1 * = ARGOUT_D1 *;
-%typemap(python, argout) ARGOUT_TUPLE_D1 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d1};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D2 * = ARGOUT_D2 *;
-%typemap(python, check) ARGOUT_TUPLE_D2 * = ARGOUT_D2 *;
-%typemap(python, argout) ARGOUT_TUPLE_D2 *
-{
-    int dim_len = 1;
-    int dims[1] = {*_d2};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D0_D1 * = ARGOUT_D0_D1 *;
-%typemap(python, check) ARGOUT_TUPLE_D0_D1 * = ARGOUT_D0_D1 *;
-%typemap(python, argout) ARGOUT_TUPLE_D0_D1 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d1};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D0_D2 * = ARGOUT_D0_D2 *;
-%typemap(python, check) ARGOUT_TUPLE_D0_D2 * = ARGOUT_D0_D2 *;
-%typemap(python, argout) ARGOUT_TUPLE_D0_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d2};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D1_D2 * = ARGOUT_D1_D2 *;
-%typemap(python, check) ARGOUT_TUPLE_D1_D2 * = ARGOUT_D1_D2 *;
-%typemap(python, argout) ARGOUT_TUPLE_D1_D2 *
-{
-    int dim_len = 2;
-    int dims[2] = {*_d1,*_d2};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-%typemap(python, ignore) ARGOUT_TUPLE_D0_D1_D2 * = ARGOUT_D0_D1_D2 *;
-%typemap(python, check) ARGOUT_TUPLE_D0_D1_D2 * = ARGOUT_D0_D1_D2 *;
-%typemap(python, argout) ARGOUT_TUPLE_D0_D1_D2 *
-{
-    int dim_len = 3;
-    int dims[3] = {*_d0,*_d1,*_d2};
-    PyObject * res;
-    res = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) 
-        return NULL;
-    $target = t_output_helper($target, res);
-}  
-
-/************************************************************************/
-/*                   typemap code for OUT arguments                     */
-/************************************************************************/
-
-%typemap(python, out) OUT_D0 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D0 *;
-    int dim_len = 1;
-    int dims[1] = {*_d0};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D1 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D1 *;
-    int dim_len = 1;
-    int dims[1] = {*_d1};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D2 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D2 *;
-    int dim_len = 1;
-    int dims[1] = {*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D0_D1 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D0_D1 *;
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d1};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D0_D2 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D0_D2 *;
-    int dim_len = 2;
-    int dims[2] = {*_d0,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D1_D2 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D1_D2 *;
-    int dim_len = 2;
-    int dims[2] = {*_d1,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, out) OUT_D0_D1_D2 *
-{
-    // identical to  typemap(python,argout) ARGOUT__D0_D1_D2 *;
-    int dim_len = 3;
-    int dims[3] = {*_d0,*_d1,*_d2};
-    $target = ARGOUT_argout( (char*) $source,"$basetype",
-                                     dims, dim_len);
-    if ($target == NULL) return NULL;
-}
-
-%typemap(python, ignore) int DIM0(int* _d0)
-{ _d0 = &$target; }
-%typemap(python, ignore) int DIM1(int* _d1)
-{ _d1 = &$target; }
-%typemap(python, ignore) int DIM2(int* _d2)
-{ _d2 = &$target; }
-
-%typemap(python, in) int IN_DIM0(int* _d0)
-{
-    $target = (int) PyInt_AsLong($source);
-    _d0 = &$target;
-}
-%typemap(python, in) int IN_DIM1(int* _d1)
-{
-    $target = (int) PyInt_AsLong($source);
-    _d1 = &$target;
-}
-%typemap(python, in) int IN_DIM2(int* _d2)
-{
-    $target = (int) PyInt_AsLong($source);
-    _d2 = &$target;
-}
-
-
-
-%apply IN_D0 * {int_IN_D0 *,float_IN_D0 *,double_IN_D0 *};
-%apply IN_D1 * {int_IN_D1 *,float_IN_D1 *,double_IN_D1 *};
-%apply IN_D2 * {int_IN_D2 *,float_IN_D2 *,double_IN_D2 *};
-%apply IN_D0_D1 * {int_IN_D0_D1 *,float_IN_D0_D1 *,double_IN_D0_D1 *};
-%apply IN_D0_D2 * {int_IN_D0_D2 *,float_IN_D0_D2 *,double_IN_D0_D2 *};
-%apply IN_D1_D2 * {int_IN_D1_D2 *,float_IN_D1_D2 *,double_IN_D1_D2 *};
-%apply IN_D0_D1_D2 * {int_IN_D0_D1_D2 *,float_IN_D0_D1_D2 *,double_IN_D0_D1_D2 *};
-
-
-%apply ARGOUT_D0 * {int_ARGOUT_D0 *,float_ARGOUT_D0 *,double_ARGOUT_D0 *};
-%apply ARGOUT_D1 * {int_ARGOUT_D1 *,float_ARGOUT_D1 *,double_ARGOUT_D1 *};
-%apply ARGOUT_D2 * {int_ARGOUT_D2 *,float_ARGOUT_D2 *,double_ARGOUT_D2 *};
-%apply ARGOUT_D0_D1 * {int_ARGOUT_D0_D1 *,float_ARGOUT_D0_D1 *,double_ARGOUT_D0_D1 *};
-%apply ARGOUT_D0_D2 * {int_ARGOUT_D0_D2 *,float_ARGOUT_D0_D2 *,double_ARGOUT_D0_D2 *};
-%apply ARGOUT_D1_D2 * {int_ARGOUT_D1_D2 *,float_ARGOUT_D1_D2 *,double_ARGOUT_D1_D2 *};
-%apply ARGOUT_D0_D1_D2 * {int_ARGOUT_D0_D1_D2 *,float_ARGOUT_D0_D1_D2 *,double_ARGOUT_D0_D1_D2 *};
-
-
-%apply ARGOUT_TUPLE_D0 * {int_ARGOUT_TUPLE_D0 *,float_ARGOUT_TUPLE_D0 *,double_ARGOUT_TUPLE_D0 *};
-%apply ARGOUT_TUPLE_D1 * {int_ARGOUT_TUPLE_D1 *,float_ARGOUT_TUPLE_D1 *,double_ARGOUT_TUPLE_D1 *};
-%apply ARGOUT_TUPLE_D2 * {int_ARGOUT_TUPLE_D2 *,float_ARGOUT_TUPLE_D2 *,double_ARGOUT_TUPLE_D2 *};
-%apply ARGOUT_TUPLE_D0_D1 * {int_ARGOUT_TUPLE_D0_D1 *,float_ARGOUT_TUPLE_D0_D1 *,double_ARGOUT_TUPLE_D0_D1 *};
-%apply ARGOUT_TUPLE_D0_D2 * {int_ARGOUT_TUPLE_D0_D2 *,float_ARGOUT_TUPLE_D0_D2 *,double_ARGOUT_TUPLE_D0_D2 *};
-%apply ARGOUT_TUPLE_D1_D2 * {int_ARGOUT_TUPLE_D1_D2 *,float_ARGOUT_TUPLE_D1_D2 *,double_ARGOUT_TUPLE_D1_D2 *};
-%apply ARGOUT_TUPLE_D0_D1_D2 * {int_ARGOUT_TUPLE_D0_D1_D2 *,float_ARGOUT_TUPLE_D0_D1_D2 *,double_ARGOUT_TUPLE_D0_D1_D2 *};
-
-
-%apply OUT_D0 * {int_OUT_D0 *,float_OUT_D0 *,double_OUT_D0 *};
-%apply OUT_D1 * {int_OUT_D1 *,float_OUT_D1 *,double_OUT_D1 *};
-%apply OUT_D2 * {int_OUT_D2 *,float_OUT_D2 *,double_OUT_D2 *};
-%apply OUT_D0_D1 * {int_OUT_D0_D1 *,float_OUT_D0_D1 *,double_OUT_D0_D1 *};
-%apply OUT_D0_D2 * {int_OUT_D0_D2 *,float_OUT_D0_D2 *,double_OUT_D0_D2 *};
-%apply OUT_D1_D2 * {int_OUT_D1_D2 *,float_OUT_D1_D2 *,double_OUT_D1_D2 *};
-%apply OUT_D0_D1_D2 * {int_OUT_D0_D1_D2 *,float_OUT_D0_D1_D2 *,double_OUT_D0_D1_D2 *};
-
-

Added: trunk/Lib/cluster/src/vq.c
===================================================================
--- trunk/Lib/cluster/src/vq.c	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq.c	2007-06-19 15:07:48 UTC (rev 3110)
@@ -0,0 +1,142 @@
+/*
+ * vim:syntax=c
+ */
+#include <stddef.h>
+#include <math.h>
+
+/*
+ * results is put into code, which contains initially the initial code
+ *
+ * mdist and code should have at least n elements
+ */
+const static double rbig = 1e100;
+
+
+static int float_vq_1d(const float *in, int n, 
+    const float *init, int ncode, 
+    int *code, float *mdist)
+{
+    int i, j;
+    float m, d;
+
+    for (i = 0; i < n; ++i) {
+        m = (float)rbig;
+        /* Compute the minimal distance for obsvervation i */
+        for (j = 0; j < ncode; ++j) {
+            d = (in[i] - init[j]);
+            d *= d;
+            if ( d < m) {
+                m = d;
+            }
+        }
+        mdist[i] = m;
+        code[i] = j;
+    }
+    return 0;
+}
+
+static int float_vq_obs(const float *obs,
+    float *code_book, int Ncodes, int Nfeatures,
+       int* code, float *lowest_dist)
+{
+	int i,j,k=0;
+	float dist, diff;
+
+	*lowest_dist = (float) rbig;
+	for(i = 0; i < Ncodes; i++) {
+		dist = 0;
+		for(j=0; j < Nfeatures; j++) {
+			diff = code_book[k] - obs[j];
+			dist += diff*diff;
+			k++;
+		}
+		dist = (float)sqrt(dist);
+		if (dist < *lowest_dist) {
+			*code = i;
+			*lowest_dist = dist;
+		}
+	}
+
+    return 0;
+}
+
+int float_tvq(
+    float* obs,
+    float* code_book, 
+    int Nobs, int Ncodes, int Nfeatures,
+    int* codes, float* lowest_dist)
+{
+    int i;
+	for( i = 0; i < Nobs; i++) {		
+		float_vq_obs(
+                    &(obs[i*Nfeatures]),
+                    code_book,Ncodes, Nfeatures,
+                    &(codes[i]), &(lowest_dist[i]));
+	}
+    return 0;
+}
+
+static int double_vq_1d(const double *in, int n, 
+    const double *init, int ncode, 
+    int *code, double *mdist)
+{
+    int i, j;
+    double m, d;
+
+    for (i = 0; i < n; ++i) {
+        m = (double)rbig;
+        /* Compute the minimal distance for obsvervation i */
+        for (j = 0; j < ncode; ++j) {
+            d = (in[i] - init[j]);
+            d *= d;
+            if ( d < m) {
+                m = d;
+            }
+        }
+        mdist[i] = m;
+        code[i] = j;
+    }
+    return 0;
+}
+
+static int double_vq_obs(const double *obs,
+    double *code_book, int Ncodes, int Nfeatures,
+       int* code, double *lowest_dist)
+{
+	int i,j,k=0;
+	double dist, diff;
+
+	*lowest_dist = (double) rbig;
+	for(i = 0; i < Ncodes; i++) {
+		dist = 0;
+		for(j=0; j < Nfeatures; j++) {
+			diff = code_book[k] - obs[j];
+			dist += diff*diff;
+			k++;
+		}
+		dist = (double)sqrt(dist);
+		if (dist < *lowest_dist) {
+			*code = i;
+			*lowest_dist = dist;
+		}
+	}
+
+    return 0;
+}
+
+int double_tvq(
+    double* obs,
+    double* code_book, 
+    int Nobs, int Ncodes, int Nfeatures,
+    int* codes, double* lowest_dist)
+{
+    int i;
+	for( i = 0; i < Nobs; i++) {		
+		double_vq_obs(
+                    &(obs[i*Nfeatures]),
+                    code_book,Ncodes, Nfeatures,
+                    &(codes[i]), &(lowest_dist[i]));
+	}
+    return 0;
+}
+

Added: trunk/Lib/cluster/src/vq.def
===================================================================
--- trunk/Lib/cluster/src/vq.def	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq.def	2007-06-19 15:07:48 UTC (rev 3110)
@@ -0,0 +1,12 @@
+autogen definitions vq.tpl;
+
+data_type = { 
+	type_name		= float ;
+	data_type		= float ;
+} ;
+
+data_type = { 
+	type_name		= double ;
+	data_type		= double ;
+} ;
+

Modified: trunk/Lib/cluster/src/vq.h
===================================================================
--- trunk/Lib/cluster/src/vq.h	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq.h	2007-06-19 15:07:48 UTC (rev 3110)
@@ -1,57 +1,10 @@
-#ifndef vq_h
-#define vq_h
-/*
-//#define real  float 
-//#define scan_format "%f"
+#ifndef _VQ_H_
+#define _VQ_H
 
-#define real double
-#define scan_format "%lf"
+int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, 
+        int Nfeatures, int* codes, double* lowest_dist);
 
+int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, 
+        int Nfeatures, int* codes, float* lowest_dist);
 
-
-void vq_obs(real* obs,real* code_book, int Ncodes, int Nfeatures,
-			   int& code, real& lowest_dist);
-
-void vq(real* obs,real* code_book, int Nobs, int Ncodes, int Nfeatures,
-	    int* codes, real* lowest_dist);
-*/
-#define BIG 10000.
-
-template<class T>
-void tvq_obs(T* obs,T* code_book, int Ncodes, int Nfeatures,
-			   int& code, T& lowest_dist)
-{
-	int i,j,k=0;
-	T dist, diff;
-
-	lowest_dist = (T) BIG;
-	for(i=0; i < Ncodes; i++)
-	{
-		dist=0;
-		for(j=0; j < Nfeatures; j++)
-		{
-			diff = code_book[k] - obs[j];
-			dist += diff*diff;
-			k++;
-		}
-		dist = (T)sqrt(dist);
-		if (dist < lowest_dist)
-		{
-			code = i;
-			lowest_dist = dist;
-		}
-	}
-}
-
-template<class T>
-void tvq(T* obs,T* code_book, int Nobs, int Ncodes, int Nfeatures,
-	    int* codes, T* lowest_dist)
-{
-    int i;
-	for( i = 0; i < Nobs; i++)
-	{		
-		tvq_obs<T>(&(obs[i*Nfeatures]),code_book,Ncodes,Nfeatures,
-				  codes[i],lowest_dist[i]);
-	}
-}
-#endif
\ No newline at end of file
+#endif

Deleted: trunk/Lib/cluster/src/vq.i
===================================================================
--- trunk/Lib/cluster/src/vq.i	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq.i	2007-06-19 15:07:48 UTC (rev 3110)
@@ -1,30 +0,0 @@
-%module _vq
-%{
-
-#include "vq.h"
-
-/* Wrappers for the template code */
-
-void float_vq(float * obs,float* code_book, int Nobs, int Ncodes, 
-              int Nfeatures, int* codes, float* lowest_dist)
-{
-    tvq<float>(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist);
-}
-
-void double_vq(double * obs,double* code_book, int Nobs, int Ncodes, 
-              int Nfeatures, int* codes, double* lowest_dist)
-{
-    tvq<double>(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist);
-}
-
-%}
-
-%include swig_num.i
-
-void double_vq(double_IN_D0_D2 *obs,double_IN_D1_D2 *code_book, 
-               int DIM0, int DIM1, int DIM2, 
-               int_ARGOUT_TUPLE_D0 *codes, double_ARGOUT_TUPLE_D0 *lowest_dist);
-
-void float_vq(float_IN_D0_D2 *obs,float_IN_D1_D2 *code_book, 
-              int DIM0, int DIM1, int DIM2, 
-              int_ARGOUT_TUPLE_D0 *codes, float_ARGOUT_TUPLE_D0 *lowest_dist);

Added: trunk/Lib/cluster/src/vq.tpl
===================================================================
--- trunk/Lib/cluster/src/vq.tpl	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq.tpl	2007-06-19 15:07:48 UTC (rev 3110)
@@ -0,0 +1,79 @@
+[+ AutoGen5 template c +]
+/*
+ * vim:syntax=c
+ */
+#include <stddef.h>
+#include <math.h>
+
+/*
+ * results is put into code, which contains initially the initial code
+ *
+ * mdist and code should have at least n elements
+ */
+const static double rbig = 1e100;
+
+[+ FOR data_type +]
+static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, 
+    const [+ (get "type_name") +] *init, int ncode, 
+    int *code, [+ (get "type_name") +] *mdist)
+{
+    int i, j;
+    [+ (get "data_type") +] m, d;
+
+    for (i = 0; i < n; ++i) {
+        m = ([+ (get "data_type") +])rbig;
+        /* Compute the minimal distance for obsvervation i */
+        for (j = 0; j < ncode; ++j) {
+            d = (in[i] - init[j]);
+            d *= d;
+            if ( d < m) {
+                m = d;
+            }
+        }
+        mdist[i] = m;
+        code[i] = j;
+    }
+    return 0;
+}
+
+static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs,
+    [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures,
+       int* code, [+ (get "data_type") +] *lowest_dist)
+{
+	int i,j,k=0;
+	[+ (get "data_type") +] dist, diff;
+
+	*lowest_dist = ([+ (get "data_type") +]) rbig;
+	for(i = 0; i < Ncodes; i++) {
+		dist = 0;
+		for(j=0; j < Nfeatures; j++) {
+			diff = code_book[k] - obs[j];
+			dist += diff*diff;
+			k++;
+		}
+		dist = ([+ (get "data_type") +])sqrt(dist);
+		if (dist < *lowest_dist) {
+			*code = i;
+			*lowest_dist = dist;
+		}
+	}
+
+    return 0;
+}
+
+int [+ (get "type_name") +]_tvq(
+    [+ (get "data_type") +]* obs,
+    [+ (get "data_type") +]* code_book, 
+    int Nobs, int Ncodes, int Nfeatures,
+    int* codes, [+ (get "data_type") +]* lowest_dist)
+{
+    int i;
+	for( i = 0; i < Nobs; i++) {		
+		[+ (get "type_name") +]_vq_obs(
+                    &(obs[i*Nfeatures]),
+                    code_book,Ncodes, Nfeatures,
+                    &(codes[i]), &(lowest_dist[i]));
+	}
+    return 0;
+}
+[+ ENDFOR data_type +]

Added: trunk/Lib/cluster/src/vq_module.c
===================================================================
--- trunk/Lib/cluster/src/vq_module.c	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq_module.c	2007-06-19 15:07:48 UTC (rev 3110)
@@ -0,0 +1,154 @@
+/*
+ * Last Change: Tue Jun 19 11:00 PM 2007 J
+ *
+ */
+#include <Python.h>
+
+#include <numpy/arrayobject.h>
+
+#include "vq.h"
+
+PyObject* compute_vq(PyObject*, PyObject*);
+
+static PyMethodDef vqmethods [] = {
+    {"vq", compute_vq, METH_VARARGS, "TODO docstring"},
+    {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC init_vq(void) 
+{
+    Py_InitModule("_vq", vqmethods);
+    import_array();
+}
+
+PyObject* compute_vq(PyObject* self, PyObject* args)
+{
+    PyObject *obs, *code, *out;
+    PyArrayObject *obs_a, *code_a;
+    PyArrayObject *index_a, *dist_a;
+    int typenum1, typenum2;
+    int n, nc, d, nd;
+
+    if ( !PyArg_ParseTuple(args, "OO", &obs, &code) ) {
+        return NULL;
+    }
+
+    /* Check that obs and code both are arrays of same type, conformant
+     * dimensions, etc...*/
+    if (!(PyArray_Check(obs) && PyArray_Check(code))) {
+		PyErr_Format(PyExc_ValueError,
+			     "observation and code should be numpy arrays");
+        return NULL;
+    }
+
+    typenum1 = PyArray_TYPE(obs);
+    typenum2 = PyArray_TYPE(code);
+    if (typenum1 != typenum1) {
+		PyErr_Format(PyExc_ValueError,
+			     "observation and code should have same type");
+        return NULL;
+    }
+    obs_a = (PyArrayObject*)PyArray_FROM_OF(obs, 
+                NPY_CONTIGUOUS | NPY_NOTSWAPPED | NPY_ALIGNED);
+    if (obs_a == NULL) {
+        return NULL;
+    }
+
+    code_a = (PyArrayObject*)PyArray_FROM_OF(code, 
+                NPY_CONTIGUOUS | NPY_NOTSWAPPED | NPY_ALIGNED);
+    if (code_a == NULL) {
+        goto clean_obs_a;
+    }
+
+    if( !(obs_a->nd == code_a->nd)) {
+		PyErr_Format(PyExc_ValueError,
+			     "observation and code should have same shape");
+        goto clean_code_a;
+    }
+
+    switch (obs_a->nd) {
+        case 1:
+            nd = 1;
+            d = 1;
+            n = PyArray_DIM(obs, 0);
+            nc = PyArray_DIM(code, 0);
+            break;
+        case 2:
+            nd = 2;
+            n = PyArray_DIM(obs, 0);
+            d = PyArray_DIM(obs, 1);
+            nc = PyArray_DIM(code, 0);
+            if (! (d == PyArray_DIM(code, 1)) ) {
+                PyErr_Format(PyExc_ValueError,
+                         "obs and code should have same number of "
+                         " features (columns)");
+                goto clean_code_a;
+            }
+            break;
+        default:
+            PyErr_Format(PyExc_ValueError,
+                     "rank different than 1 or 2 are not supported");
+            goto clean_code_a;
+    }
+
+    switch (PyArray_TYPE(obs)) {
+        case NPY_FLOAT:
+            dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0);
+            if (dist_a == NULL) {
+                goto clean_code_a;
+            }
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            if (index_a == NULL) {
+                goto clean_dist_a;
+            }
+            float_tvq((float*)obs_a->data, (float*)code_a->data, n, nc, d,
+                    (int*)index_a->data, (float*)dist_a->data);
+            break;
+        case NPY_DOUBLE:
+            dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0);
+            if (dist_a == NULL) {
+                goto clean_code_a;
+            }
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            if (index_a == NULL) {
+                goto clean_dist_a;
+            }
+            double_tvq((double*)obs_a->data, (double*)code_a->data, n, nc, d,
+                    (int*)index_a->data, (double*)dist_a->data);
+            break;
+        default:
+            PyErr_Format(PyExc_ValueError,
+                     "type other than float or double not supported");
+            goto clean_code_a;
+    }
+
+    /* Create output */
+    out = PyTuple_New(2);
+    if (out == NULL) {
+        goto clean_index_a;
+    }
+    if (PyTuple_SetItem(out, 0, (PyObject*)index_a)) {
+        goto clean_out;
+    }
+    if (PyTuple_SetItem(out, 1, (PyObject*)dist_a)) {
+        goto clean_out;
+    }
+
+    /* Clean everything */
+    Py_DECREF(code_a);
+    Py_DECREF(obs_a);
+    return out;
+
+clean_out:
+    Py_DECREF(out);
+clean_dist_a:
+    Py_DECREF(dist_a);
+clean_index_a:
+    Py_DECREF(index_a);
+clean_code_a:
+    Py_DECREF(code_a);
+clean_obs_a:
+    Py_DECREF(obs_a);
+    return NULL;
+}
+

Deleted: trunk/Lib/cluster/src/vq_wrap.cpp
===================================================================
--- trunk/Lib/cluster/src/vq_wrap.cpp	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/src/vq_wrap.cpp	2007-06-19 15:07:48 UTC (rev 3110)
@@ -1,982 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3u-20010227-1913 (Alpha 5)
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-#define SWIGPYTHON
-/***********************************************************************
- * common.swg
- *
- *     This file contains generic SWIG runtime support for pointer
- *     type checking as well as a few commonly used macros to control
- *     external linkage.
- *
- * Author : David Beazley (beazley at cs.uchicago.edu)
- *
- * Copyright (c) 1999-2000, The University of Chicago
- * 
- * This file may be freely redistributed without license or fee provided
- * this copyright message remains intact.
- ************************************************************************/
-
-#include <string.h>
-
-#if defined(_WIN32) || defined(__WIN32__)
-#       if defined(_MSC_VER)
-#               if defined(STATIC_LINKED)
-#                       define SWIGEXPORT(a) a
-#               else
-#                       define SWIGEXPORT(a) __declspec(dllexport) a
-#               endif
-#       else
-#               if defined(__BORLANDC__)
-#                       define SWIGEXPORT(a) a _export
-#               else
-#                       define SWIGEXPORT(a) a
-#       endif
-#endif
-#else
-#       define SWIGEXPORT(a) a
-#endif
-
-#ifdef SWIG_GLOBAL
-#define SWIGRUNTIME(a) SWIGEXPORT(a)
-#else
-#define SWIGRUNTIME(a) static a
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct swig_type_info {
-  char  *name;                 
-  void *(*converter)(void *);
-  char  *str;
-  struct swig_type_info  *next;
-  struct swig_type_info  *prev;
-} swig_type_info;
-
-#ifdef SWIG_NOINCLUDE
-SWIGEXPORT(swig_type_info *) SWIG_TypeRegister(swig_type_info *);
-SWIGEXPORT(swig_type_info *) SWIG_TypeCheck(char *c, swig_type_info *);
-SWIGEXPORT(void *) SWIG_TypeCast(swig_type_info *, void *);
-#else
-
-static swig_type_info *swig_type_list = 0;
-
-/* Register a type mapping with the type-checking */
-SWIGRUNTIME(swig_type_info *)
-SWIG_TypeRegister(swig_type_info *ti)
-{
-  swig_type_info *tc, *head, *ret, *next;
-  /* Check to see if this type has already been registered */
-  tc = swig_type_list;
-  while (tc) {
-    if (strcmp(tc->name, ti->name) == 0) {
-      /* Already exists in the table.  Just add additional types to the list */
-      head = tc;
-      next = tc->next;
-      goto l1;
-    }
-    tc = tc->prev;
-  }
-  head = ti;
-  next = 0;
-
-  /* Place in list */
-  ti->prev = swig_type_list;
-  swig_type_list = ti;
-
-  /* Build linked lists */
- l1:
-  ret = head;
-  tc = ti + 1;
-  /* Patch up the rest of the links */
-  while (tc->name) {
-    head->next = tc;
-    tc->prev = head;
-    head = tc;
-    tc++;
-  }
-  head->next = next;
-  return ret;
-}
-
-/* Check the typename */
-SWIGRUNTIME(swig_type_info *) 
-SWIG_TypeCheck(char *c, swig_type_info *ty)
-{
-  swig_type_info *s;
-  if (!ty) return 0;        /* Void pointer */
-  s = ty->next;             /* First element always just a name */
-  while (s) {
-    if (strcmp(s->name,c) == 0) {
-      if (s == ty->next) return s;
-      /* Move s to the top of the linked list */
-      s->prev->next = s->next;
-      if (s->next) {
-	s->next->prev = s->prev;
-      }
-      /* Insert s as second element in the list */
-      s->next = ty->next;
-      if (ty->next) ty->next->prev = s;
-      ty->next = s;
-      return s;
-    }
-    s = s->next;
-  }
-  return 0;
-}
-
-/* Cast a pointer (needed for C++ inheritance */
-SWIGRUNTIME(void *) 
-SWIG_TypeCast(swig_type_info *ty, void *ptr) 
-{
-  if ((!ty) || (!ty->converter)) return ptr;
-  return (*ty->converter)(ptr);
-}
-
-/* Search for a swig_type_info structure */
-SWIGRUNTIME(void *)
-SWIG_TypeQuery(const char *name) {
-  swig_type_info *ty = swig_type_list;
-  while (ty) {
-    if (ty->str && (strcmp(name,ty->str) == 0)) return ty;
-    if (ty->name && (strcmp(name,ty->name) == 0)) return ty;
-    ty = ty->prev;
-  }
-  return 0;
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-
-
-/***********************************************************************
- * python.swg
- *
- *     This file contains the runtime support for Python modules
- *     and includes code for managing global variables and pointer
- *     type checking.
- *
- * Author : David Beazley (beazley at cs.uchicago.edu)
- ************************************************************************/
-
-#include <stdlib.h>
-#include "Python.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SWIG_PY_INT     1
-#define SWIG_PY_FLOAT   2
-#define SWIG_PY_STRING  3
-#define SWIG_PY_POINTER 4
-
-/* Constant information structure */
-typedef struct swig_const_info {
-    int type;
-    char *name;
-    long lvalue;
-    double dvalue;
-    void   *pvalue;
-    swig_type_info **ptype;
-} swig_const_info;
-
-#ifdef SWIG_NOINCLUDE
-
-SWIGEXPORT(PyObject *)        SWIG_newvarlink();
-SWIGEXPORT(void)              SWIG_addvarlink(PyObject *, char *, PyObject *(*)(void), int (*)(PyObject *));
-SWIGEXPORT(int)               SWIG_ConvertPtr(PyObject *, void **, swig_type_info *, int);
-SWIGEXPORT(void)              SWIG_MakePtr(char *c, void *, swig_type_info *);
-SWIGEXPORT(PyObject *)        SWIG_NewPointerObj(void *, swig_type_info *);
-SWIGEXPORT(void)              SWIG_InstallConstants(PyObject *d, swig_const_info constants[]);
-
-#else
-
-/* -----------------------------------------------------------------------------
- * global variable support code.
- * ----------------------------------------------------------------------------- */
-
-typedef struct swig_globalvar {   
-  char       *name;                  /* Name of global variable */
-  PyObject *(*get_attr)(void);       /* Return the current value */
-  int       (*set_attr)(PyObject *); /* Set the value */
-  struct swig_globalvar *next;
-} swig_globalvar;
-
-typedef struct swig_varlinkobject {
-  PyObject_HEAD
-  swig_globalvar *vars;
-} swig_varlinkobject;
-
-static PyObject *
-swig_varlink_repr(swig_varlinkobject *v) {
-  v = v;
-  return PyString_FromString("<Global variables>");
-}
-
-static int
-swig_varlink_print(swig_varlinkobject *v, FILE *fp, int flags) {
-  swig_globalvar  *var;
-  flags = flags;
-  fprintf(fp,"Global variables { ");
-  for (var = v->vars; var; var=var->next) {
-    fprintf(fp,"%s", var->name);
-    if (var->next) fprintf(fp,", ");
-  }
-  fprintf(fp," }\n");
-  return 0;
-}
-
-static PyObject *
-swig_varlink_getattr(swig_varlinkobject *v, char *n) {
-  swig_globalvar *var = v->vars;
-  while (var) {
-    if (strcmp(var->name,n) == 0) {
-      return (*var->get_attr)();
-    }
-    var = var->next;
-  }
-  PyErr_SetString(PyExc_NameError,"Unknown C global variable");
-  return NULL;
-}
-
-static int
-swig_varlink_setattr(swig_varlinkobject *v, char *n, PyObject *p) {
-  swig_globalvar *var = v->vars;
-  while (var) {
-    if (strcmp(var->name,n) == 0) {
-      return (*var->set_attr)(p);
-    }
-    var = var->next;
-  }
-  PyErr_SetString(PyExc_NameError,"Unknown C global variable");
-  return 1;
-}
-
-statichere PyTypeObject varlinktype = {
-  PyObject_HEAD_INIT(0)              
-  0,
-  "swigvarlink",                      /* Type name    */
-  sizeof(swig_varlinkobject),         /* Basic size   */
-  0,                                  /* Itemsize     */
-  0,                                  /* Deallocator  */ 
-  (printfunc) swig_varlink_print,     /* Print        */
-  (getattrfunc) swig_varlink_getattr, /* get attr     */
-  (setattrfunc) swig_varlink_setattr, /* Set attr     */
-  0,                                  /* tp_compare   */
-  (reprfunc) swig_varlink_repr,       /* tp_repr      */    
-  0,                                  /* tp_as_number */
-  0,                                  /* tp_as_mapping*/
-  0,                                  /* tp_hash      */
-};
-
-/* Create a variable linking object for use later */
-SWIGRUNTIME(PyObject *)
-SWIG_newvarlink(void) {
-  swig_varlinkobject *result = 0;
-  result = PyMem_NEW(swig_varlinkobject,1);
-  varlinktype.ob_type = &PyType_Type;    /* Patch varlinktype into a PyType */
-  result->ob_type = &varlinktype;
-  result->vars = 0;
-  result->ob_refcnt = 0;
-  Py_XINCREF((PyObject *) result);
-  return ((PyObject*) result);
-}
-
-SWIGRUNTIME(void)
-SWIG_addvarlink(PyObject *p, char *name,
-	   PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) {
-  swig_varlinkobject *v;
-  swig_globalvar *gv;
-  v= (swig_varlinkobject *) p;
-  gv = (swig_globalvar *) malloc(sizeof(swig_globalvar));
-  gv->name = (char *) malloc(strlen(name)+1);
-  strcpy(gv->name,name);
-  gv->get_attr = get_attr;
-  gv->set_attr = set_attr;
-  gv->next = v->vars;
-  v->vars = gv;
-}
-/* Convert a pointer value */
-SWIGRUNTIME(int)
-SWIG_ConvertPtr(PyObject *obj, void **ptr, swig_type_info *ty, int flags) {
-  unsigned long p;
-  register int d;
-  swig_type_info *tc;
-  char  *c;
-  static PyObject *SWIG_this = 0;
-  int    newref = 0;
-
-  if (!obj || (obj == Py_None)) {
-    *ptr = 0;
-    return 0;
-  }
-#ifdef SWIG_COBJECT_TYPES
-  if (!(PyCObject_Check(obj))) {
-    if (!SWIG_this)
-      SWIG_this = PyString_InternFromString("this");
-    obj = PyObject_GetAttr(obj,SWIG_this);
-    newref = 1;
-    if (!obj) goto type_error;
-    if (!PyCObject_Check(obj)) {
-      Py_DECREF(obj);
-      goto type_error;
-    }
-  } 
-  *ptr = PyCObject_AsVoidPtr(obj);
-  c = (char *) PyCObject_GetDesc(obj);
-  if (newref) Py_DECREF(obj);
-  goto cobject;
-#else
-  if (!(PyString_Check(obj))) {
-    if (!SWIG_this)
-      SWIG_this = PyString_InternFromString("this");
-    obj = PyObject_GetAttr(obj,SWIG_this);
-    newref = 1;
-    if (!obj) goto type_error;
-    if (!PyString_Check(obj)) {
-      Py_DECREF(obj);
-      goto type_error;
-    }
-  } 
-  c = PyString_AsString(obj);
-  p = 0;
-  /* Pointer values must start with leading underscore */
-  if (*c != '_') {
-    *ptr = (void *) 0;
-    if (strcmp(c,"NULL") == 0) {
-      if (newref) Py_DECREF(obj);
-      return 0;
-    } else {
-      if (newref) Py_DECREF(obj);
-      goto type_error;
-    }
-  }
-  c++;
-  /* Extract hex value from pointer */
-  while ((d = *c)) {
-    if ((d >= '0') && (d <= '9'))
-      p = (p << 4) + (d - '0');
-    else if ((d >= 'a') && (d <= 'f'))
-      p = (p << 4) + (d - ('a'-10));
-    else
-      break; 
-    c++;
-  }
-  *ptr = (void *) p;
-  if (newref) Py_DECREF(obj);
-#endif
-
-#ifdef SWIG_COBJECT_TYPES
-cobject:
-#endif
-
-  if (ty) {
-    tc = SWIG_TypeCheck(c,ty);
-    if (!tc) goto type_error;
-    *ptr = SWIG_TypeCast(tc,(void*)p);
-  }
-  return 0;
-
-type_error:
-
-  if (flags) {
-    if (ty) {
-      char *temp = (char *) malloc(64+strlen(ty->name));
-      sprintf(temp,"Type error. Expected %s", ty->name);
-      PyErr_SetString(PyExc_TypeError, temp);
-      free((char *) temp);
-    } else {
-      PyErr_SetString(PyExc_TypeError,"Expected a pointer");
-    }
-  }
-  return -1;
-}
-
-/* Take a pointer and convert it to a string */
-SWIGRUNTIME(void) 
-SWIG_MakePtr(char *c, void *ptr, swig_type_info *ty) {
-  static char hex[17] = "0123456789abcdef";
-  unsigned long p, s;
-  char result[32], *r; 
-  r = result;
-  p = (unsigned long) ptr;
-  if (p > 0) {
-    while (p > 0) {
-      s = p & 0xf;
-      *(r++) = hex[s];
-      p = p >> 4;
-    }
-    *r = '_';
-    while (r >= result)
-      *(c++) = *(r--);
-    strcpy (c, ty->name);
-  } else {
-    strcpy (c, "NULL");
-  }
-}
-
-/* Create a new pointer object */
-SWIGRUNTIME(PyObject *)
-SWIG_NewPointerObj(void *ptr, swig_type_info *type) {
-  char result[512];
-  PyObject *robj;
-  if (!ptr) {
-    Py_INCREF(Py_None);
-    return Py_None;
-  }
-#ifdef SWIG_COBJECT_TYPES
-  robj = PyCObject_FromVoidPtrAndDesc((void *) ptr, type->name, NULL);
-#else
-  SWIG_MakePtr(result,ptr,type);
-  robj = PyString_FromString(result);
-#endif
-  return robj;
-}
-
-/* Install Constants */
-SWIGRUNTIME(void)
-SWIG_InstallConstants(PyObject *d, swig_const_info constants[]) {
-  int i;
-  PyObject *obj;
-  for (i = 0; constants[i].type; i++) {
-    switch(constants[i].type) {
-    case SWIG_PY_INT:
-      obj = PyInt_FromLong(constants[i].lvalue);
-      break;
-    case SWIG_PY_FLOAT:
-      obj = PyFloat_FromDouble(constants[i].dvalue);
-      break;
-    case SWIG_PY_STRING:
-      obj = PyString_FromString((char *) constants[i].pvalue);
-      break;
-    case SWIG_PY_POINTER:
-      obj = SWIG_NewPointerObj(constants[i].pvalue, *(constants[i]).ptype);
-      break;
-    default:
-      obj = 0;
-      break;
-    }
-    if (obj) {
-      PyDict_SetItemString(d,constants[i].name,obj);
-      Py_DECREF(obj);
-    }
-  }
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-
-
-/* -------- TYPES TABLE (BEGIN) -------- */
-
-static swig_type_info *swig_types[1];
-
-/* -------- TYPES TABLE (END) -------- */
-
-#define SWIG_init    init_vq
-
-#define SWIG_name    "_vq"
-
-
-#include "vq.h"
-
-/* Wrappers for the template code */
-
-void float_vq(float * obs,float* code_book, int Nobs, int Ncodes, 
-              int Nfeatures, int* codes, float* lowest_dist)
-{
-    tvq<float>(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist);
-}
-
-void double_vq(double * obs,double* code_book, int Nobs, int Ncodes, 
-              int Nfeatures, int* codes, double* lowest_dist)
-{
-    tvq<double>(obs,code_book,Nobs,Ncodes,Nfeatures,codes,lowest_dist);
-}
-
-
-static PyObject* l_output_helper(PyObject* target, PyObject* o) {
-    PyObject*   o2;
-    if (!target) {                   
-        target = o;
-    } else if (target == Py_None) {  
-        Py_DECREF(Py_None);
-        target = o;
-    } else {                         
-        if (!PyList_Check(target)) {
-            o2 = target;
-            target = PyList_New(0);
-            PyList_Append(target, o2);
-	    Py_XDECREF(o2);
-        }
-        PyList_Append(target,o);
-	Py_XDECREF(o);
-    }
-    return target;
-}
-
-static PyObject* t_output_helper(PyObject* target, PyObject* o) {
-    PyObject*   o2;
-    PyObject*   o3;
-
-    if (!target) {                   
-        target = o;
-    } else if (target == Py_None) {  
-        Py_DECREF(Py_None);
-        target = o;
-    } else {                         
-        if (!PyTuple_Check(target)) {
-            o2 = target;
-            target = PyTuple_New(1);
-            PyTuple_SetItem(target, 0, o2);
-        }
-        o3 = PyTuple_New(1);            
-        PyTuple_SetItem(o3, 0, o);      
-
-        o2 = target;
-        target = PySequence_Concat(o2, o3); 
-        Py_DECREF(o2);                      
-        Py_DECREF(o3);
-    }
-    return target;
-}
-
-
-#include "numpy/noprefix.h"
-
-// hmmm. how do we prevent typedefs from conflicting
-// with users definition on complex numbers?
-//#include "complex_test.h"
-//typedef struct{ float real; 
-//                float imag;} complex;
-//typedef struct{ double real; 
-//                double imag;} zcomplex;
-
-
-// used for converting typecodes to memory sizes.
-int char_to_size(char type)
-{
-    if (type=='i') return sizeof(int);
-    if (type=='f') return sizeof(float);
-    if (type=='d') return sizeof(double);
-    if (type=='c') return 2*sizeof(float);
-    if (type=='z') return 2*sizeof(double);
-}
-int char_to_numtype(char type)
-{
-    if (type=='i') return 'i';
-    if (type=='f') return 'f';
-    if (type=='d') return 'd';
-    if (type=='c') return 'F';
-    if (type=='z') return 'D';
-}
-
-
-typedef int int_IN_D0;
-typedef float float_IN_D0;
-typedef double double_IN_D0;
-typedef int int_IN_D1;
-typedef float float_IN_D1;
-typedef double double_IN_D1;
-typedef int int_IN_D2;
-typedef float float_IN_D2;
-typedef double double_IN_D2;
-typedef int int_IN_D0_D1;
-typedef float float_IN_D0_D1;
-typedef double double_IN_D0_D1;
-typedef int int_IN_D0_D2;
-typedef float float_IN_D0_D2;
-typedef double double_IN_D0_D2;
-typedef int int_IN_D1_D2;
-typedef float float_IN_D1_D2;
-typedef double double_IN_D1_D2;
-typedef int int_IN_D0_D1_D2;
-typedef float float_IN_D0_D1_D2;
-typedef double double_IN_D0_D1_D2;
-
-
-typedef int int_ARGOUT_D0;
-typedef float float_ARGOUT_D0;
-typedef double double_ARGOUT_D0;
-typedef int int_ARGOUT_D1;
-typedef float float_ARGOUT_D1;
-typedef double double_ARGOUT_D1;
-typedef int int_ARGOUT_D2;
-typedef float float_ARGOUT_D2;
-typedef double double_ARGOUT_D2;
-typedef int int_ARGOUT_D0_D1;
-typedef float float_ARGOUT_D0_D1;
-typedef double double_ARGOUT_D0_D1;
-typedef int int_ARGOUT_D0_D2;
-typedef float float_ARGOUT_D0_D2;
-typedef double double_ARGOUT_D0_D2;
-typedef int int_ARGOUT_D1_D2;
-typedef float float_ARGOUT_D1_D2;
-typedef double double_ARGOUT_D1_D2;
-typedef int int_ARGOUT_D0_D1_D2;
-typedef float float_ARGOUT_D0_D1_D2;
-typedef double double_ARGOUT_D0_D1_D2;
-
-
-typedef int int_ARGOUT_TUPLE_D0;
-typedef float float_ARGOUT_TUPLE_D0;
-typedef double double_ARGOUT_TUPLE_D0;
-typedef int int_ARGOUT_TUPLE_D1;
-typedef float float_ARGOUT_TUPLE_D1;
-typedef double double_ARGOUT_TUPLE_D1;
-typedef int int_ARGOUT_TUPLE_D2;
-typedef float float_ARGOUT_TUPLE_D2;
-typedef double double_ARGOUT_TUPLE_D2;
-typedef int int_ARGOUT_TUPLE_D0_D1;
-typedef float float_ARGOUT_TUPLE_D0_D1;
-typedef double double_ARGOUT_TUPLE_D0_D1;
-typedef int int_ARGOUT_TUPLE_D0_D2;
-typedef float float_ARGOUT_TUPLE_D0_D2;
-typedef double double_ARGOUT_TUPLE_D0_D2;
-typedef int int_ARGOUT_TUPLE_D1_D2;
-typedef float float_ARGOUT_TUPLE_D1_D2;
-typedef double double_ARGOUT_TUPLE_D1_D2;
-typedef int int_ARGOUT_TUPLE_D0_D1_D2;
-typedef float float_ARGOUT_TUPLE_D0_D1_D2;
-typedef double double_ARGOUT_TUPLE_D0_D1_D2;
-
-
-typedef int int_OUT_D0;
-typedef float float_OUT_D0;
-typedef double double_OUT_D0;
-typedef int int_OUT_D1;
-typedef float float_OUT_D1;
-typedef double double_OUT_D1;
-typedef int int_OUT_D2;
-typedef float float_OUT_D2;
-typedef double double_OUT_D2;
-typedef int int_OUT_D0_D1;
-typedef float float_OUT_D0_D1;
-typedef double double_OUT_D0_D1;
-typedef int int_OUT_D0_D2;
-typedef float float_OUT_D0_D2;
-typedef double double_OUT_D0_D2;
-typedef int int_OUT_D1_D2;
-typedef float float_OUT_D1_D2;
-typedef double double_OUT_D1_D2;
-typedef int int_OUT_D0_D1_D2;
-typedef float float_OUT_D0_D1_D2;
-typedef double double_OUT_D0_D1_D2;
-
-
-                                                                        
-PyArrayObject* IN_in(PyObject* source, char* basetype_string,             
-                             int** target_dims, int dims)                 
-{                                                                         
-    PyArrayObject *a_obj;                                                 
-    char ar_type = char_to_numtype(basetype_string[0]);                   
-    a_obj = (PyArrayObject*) PyArray_ContiguousFromObject(source,ar_type, 
-                                                             dims,dims);  
-    if (a_obj == NULL)                                                    
-    {                                                                     
-        //PyArray Contiguous From Object will set the error value.        
-    	return NULL;                                                      
-    }                                                                     
-    for(int i = 0; i < dims;i++)                                          
-    {                                                                     
-        *(target_dims[i]) = a_obj->dimensions[i];                         
-    }                                                                     
-    return a_obj;                                                         
-}                                                                         
-                                                                          
-
-char* ARGOUT_check(char* basetype_string,int* dims, int dim_len)
-{
-    char *rdata;
-    int element_size = char_to_size(basetype_string[0]);
-    int tot_length = 1;
-    for (int i = 0; i < dim_len; i++)
-        tot_length *= dims[i];        
-    rdata = (char*)malloc(tot_length*element_size);
-    if(rdata == NULL)
-	{
-	    PyErr_SetString(PyExc_MemoryError, "can't allocate memory for output array for arg$argnum");
-	    return NULL;
-	}	
-    return rdata;
-}
-
-PyObject* ARGOUT_argout(char* source, char* basetype_string, 
-                                int* dims, int dim_len)
-{
-    PyArrayObject *res;
-    char array_type = char_to_numtype(basetype_string[0]);
-    res = (PyArrayObject *)PyArray_FromDimsAndData(dim_len, dims, 
-                                                   array_type,source);
-    if(res == NULL)
-    {
-        //PyErr_SetString(PyExc_ValueError, "error converting internal data to array");
-    	return NULL;
-    }
-    res->flags |= NPY_OWNDATA; // we want the array to deallocate mem when it is finished.
-    // stick result in the output tuple (target).
-    // Need to think about generality of this one...
-    return (PyObject *) res;
-}
-#ifdef __cplusplus
-extern "C" {
-#endif
-static PyObject *_wrap_double_vq(PyObject *self, PyObject *args) {
-    PyObject *resultobj;
-    double_IN_D0_D2 *arg0 ;
-    double_IN_D1_D2 *arg1 ;
-    int arg2 ;
-    int arg3 ;
-    int arg4 ;
-    int_ARGOUT_TUPLE_D0 *arg5 ;
-    double_ARGOUT_TUPLE_D0 *arg6 ;
-    int *_d0 ;
-    int *_d1 ;
-    int *_d2 ;
-    PyObject * obj0  = 0 ;
-    PyObject * obj1  = 0 ;
-    
-    {
-        _d0 = &arg2; 
-    }
-    {
-        _d1 = &arg3; 
-    }
-    {
-        _d2 = &arg4; 
-    }
-    {
-    }
-    {
-    }
-    if(!PyArg_ParseTuple(args,"OO:double_vq",&obj0,&obj1)) return NULL;
-    {
-        int* targ_dims[2] = {
-            _d0,_d2
-        };
-        PyArrayObject* a_obj = IN_in(obj0,"double_IN_D0_D2",targ_dims,2);
-        if (a_obj == NULL) return NULL;
-        arg0 = (double_IN_D0_D2 *) a_obj->data;
-        obj0 = (PyObject*)a_obj;
-    }
-    {
-        int* targ_dims[2] = {
-            _d1,_d2
-        };
-        PyArrayObject* a_obj = IN_in(obj1,"double_IN_D1_D2",targ_dims,2);
-        if (a_obj == NULL) return NULL;
-        arg1 = (double_IN_D1_D2 *) a_obj->data;
-        obj1 = (PyObject*)a_obj;
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        arg5 = (int_ARGOUT_TUPLE_D0 *) ARGOUT_check("int_ARGOUT_TUPLE_D0",dims,dim_len);
-        if (arg5 == NULL) return NULL;
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        arg6 = (double_ARGOUT_TUPLE_D0 *) ARGOUT_check("double_ARGOUT_TUPLE_D0",dims,dim_len);
-        if (arg6 == NULL) return NULL;
-    }
-    double_vq(arg0,arg1,arg2,arg3,arg4,arg5,arg6);
-    Py_INCREF(Py_None);
-    resultobj = Py_None;
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        PyObject * res;
-        res = ARGOUT_argout( (char*) arg5,"int_ARGOUT_TUPLE_D0",
-        dims, dim_len);
-        if (resultobj == NULL) 
-        return NULL;
-        resultobj = t_output_helper(resultobj, res);
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        PyObject * res;
-        res = ARGOUT_argout( (char*) arg6,"double_ARGOUT_TUPLE_D0",
-        dims, dim_len);
-        if (resultobj == NULL) 
-        return NULL;
-        resultobj = t_output_helper(resultobj, res);
-    }
-    {
-        Py_XDECREF(obj0); 
-    }
-    {
-        Py_XDECREF(obj1); 
-    }
-    return resultobj;
-}
-
-
-static PyObject *_wrap_float_vq(PyObject *self, PyObject *args) {
-    PyObject *resultobj;
-    float_IN_D0_D2 *arg0 ;
-    float_IN_D1_D2 *arg1 ;
-    int arg2 ;
-    int arg3 ;
-    int arg4 ;
-    int_ARGOUT_TUPLE_D0 *arg5 ;
-    float_ARGOUT_TUPLE_D0 *arg6 ;
-    int *_d0 ;
-    int *_d1 ;
-    int *_d2 ;
-    PyObject * obj0  = 0 ;
-    PyObject * obj1  = 0 ;
-    
-    {
-        _d0 = &arg2; 
-    }
-    {
-        _d1 = &arg3; 
-    }
-    {
-        _d2 = &arg4; 
-    }
-    {
-    }
-    {
-    }
-    if(!PyArg_ParseTuple(args,"OO:float_vq",&obj0,&obj1)) return NULL;
-    {
-        int* targ_dims[2] = {
-            _d0,_d2
-        };
-        PyArrayObject* a_obj = IN_in(obj0,"float_IN_D0_D2",targ_dims,2);
-        if (a_obj == NULL) return NULL;
-        arg0 = (float_IN_D0_D2 *) a_obj->data;
-        obj0 = (PyObject*)a_obj;
-    }
-    {
-        int* targ_dims[2] = {
-            _d1,_d2
-        };
-        PyArrayObject* a_obj = IN_in(obj1,"float_IN_D1_D2",targ_dims,2);
-        if (a_obj == NULL) return NULL;
-        arg1 = (float_IN_D1_D2 *) a_obj->data;
-        obj1 = (PyObject*)a_obj;
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        arg5 = (int_ARGOUT_TUPLE_D0 *) ARGOUT_check("int_ARGOUT_TUPLE_D0",dims,dim_len);
-        if (arg5 == NULL) return NULL;
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        arg6 = (float_ARGOUT_TUPLE_D0 *) ARGOUT_check("float_ARGOUT_TUPLE_D0",dims,dim_len);
-        if (arg6 == NULL) return NULL;
-    }
-    float_vq(arg0,arg1,arg2,arg3,arg4,arg5,arg6);
-    Py_INCREF(Py_None);
-    resultobj = Py_None;
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        PyObject * res;
-        res = ARGOUT_argout( (char*) arg5,"int_ARGOUT_TUPLE_D0",
-        dims, dim_len);
-        if (resultobj == NULL) 
-        return NULL;
-        resultobj = t_output_helper(resultobj, res);
-    }
-    {
-        int dim_len = 1;
-        int dims[1] = {
-            *_d0
-        };
-        PyObject * res;
-        res = ARGOUT_argout( (char*) arg6,"float_ARGOUT_TUPLE_D0",
-        dims, dim_len);
-        if (resultobj == NULL) 
-        return NULL;
-        resultobj = t_output_helper(resultobj, res);
-    }
-    {
-        Py_XDECREF(obj0); 
-    }
-    {
-        Py_XDECREF(obj1); 
-    }
-    return resultobj;
-}
-
-
-static PyMethodDef _vqMethods[] = {
-	 { "double_vq", _wrap_double_vq, METH_VARARGS },
-	 { "float_vq", _wrap_float_vq, METH_VARARGS },
-	 { NULL, NULL }
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (BEGIN) -------- */
-
-
-static swig_type_info *swig_types_initial[] = {
-0
-};
-
-
-/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (END) -------- */
-
-static swig_const_info swig_const_table[] = {
-{0}};
-
-static PyObject *SWIG_globals;
-#ifdef __cplusplus
-extern "C" 
-#endif
-SWIGEXPORT(void) init_vq(void) {
-    PyObject *m, *d;
-    int i;
-    SWIG_globals = SWIG_newvarlink();
-    m = Py_InitModule("_vq", _vqMethods);
-    d = PyModule_GetDict(m);
-    for (i = 0; swig_types_initial[i]; i++) {
-        swig_types[i] = SWIG_TypeRegister(swig_types_initial[i]);
-    }
-    
-    import_array();
-    SWIG_InstallConstants(d,swig_const_table);
-}
-

Modified: trunk/Lib/cluster/tests/test_vq.py
===================================================================
--- trunk/Lib/cluster/tests/test_vq.py	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/tests/test_vq.py	2007-06-19 15:07:48 UTC (rev 3110)
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 
 # David Cournapeau
-# Last Change: Fri Jun 08 12:00 PM 2007 J
+# Last Change: Tue Jun 19 10:00 PM 2007 J
 
 # For now, just copy the tests from sandbox.pyem, so we can check that
 # kmeans works OK for trivial examples.
@@ -12,7 +12,7 @@
 import numpy as N
 
 set_package_path()
-from cluster.vq import kmeans, kmeans2, py_vq, py_vq2, _py_vq_1d
+from cluster.vq import kmeans, kmeans2, py_vq, py_vq2, _py_vq_1d, vq
 try:
     from cluster import _vq
     TESTC=True
@@ -60,19 +60,36 @@
         initc = N.concatenate(([[X[0]], [X[1]], [X[2]]]))
         code = initc.copy()
         if TESTC:
-            label1 = _vq.double_vq(X, initc)[0]
+            label1, dist = _vq.vq(X, initc)
             assert_array_equal(label1, LABEL1)
+            tlabel1, tdist = vq(X, initc)
         else:
             print "== not testing C imp of vq =="
 
-    #def check_vq_1d(self, level=1):
+    #def check_py_vq_1d(self, level=1):
+    #    """Test special rank 1 vq algo, python implementation."""
     #    data = X[:, 0]
     #    initc = data[:3]
     #    code = initc.copy()
-    #    print _py_vq_1d(data, initc)
+    #    a, b = _py_vq_1d(data, initc)
+    #    ta, tb = py_vq(data[:, N.newaxis], initc[:, N.newaxis])
+    #    assert_array_equal(a, ta)
+    #    assert_array_equal(b, tb)
 
+    def check_vq_1d(self, level=1):
+        """Test special rank 1 vq algo, python implementation."""
+        data = X[:, 0]
+        initc = data[:3]
+        code = initc.copy()
+        if TESTC:
+            a, b = _vq.vq(data, initc)
+            ta, tb = py_vq(data[:, N.newaxis], initc[:, N.newaxis])
+            assert_array_equal(a, ta)
+            assert_array_equal(b, tb)
+        else:
+            print "== not testing C imp of vq (rank 1) =="
+
 class test_kmean(NumpyTestCase):
-    #def check_kmeans
     def check_kmeans_simple(self, level=1):
         initc = N.concatenate(([[X[0]], [X[1]], [X[2]]]))
         code = initc.copy()
@@ -100,18 +117,17 @@
         assert_array_almost_equal(code1, CODET1)
         assert_array_almost_equal(code2, CODET2)
 
-    #def check_kmeans2_rank1(self, level=1):
-    #    """Testing simple call to kmeans2 with rank 1 data."""
-    #    data = N.fromfile(open(DATAFILE1), sep = ", ")
-    #    data = data.reshape((200, 2))
-    #    data1 = data[:, 0]
-    #    data2 = data[:, 1]
+    def check_kmeans2_rank1(self, level=1):
+        """Testing simple call to kmeans2 with rank 1 data."""
+        data = N.fromfile(open(DATAFILE1), sep = ", ")
+        data = data.reshape((200, 2))
+        data1 = data[:, 0]
+        data2 = data[:, 1]
 
-    #    initc = data1[:3]
-    #    code = initc.copy()
-    #    print _py_vq_1d(data1, code)
-    #    code1 = kmeans2(data1, code, niter = 1)[0]
-    #    code2 = kmeans2(data1, code, niter = 2)[0]
+        initc = data1[:3]
+        code = initc.copy()
+        code1 = kmeans2(data1, code, iter = 1)[0]
+        code2 = kmeans2(data1, code, iter = 2)[0]
 
     def check_kmeans2_init(self, level = 1):
         """Testing that kmeans2 init methods work."""
@@ -126,6 +142,5 @@
         kmeans2(data, 3, minit = 'random')
         kmeans2(data, 3, minit = 'points')
 
-
 if __name__ == "__main__":
     NumpyTest().run()

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-18 23:22:19 UTC (rev 3109)
+++ trunk/Lib/cluster/vq.py	2007-06-19 15:07:48 UTC (rev 3110)
@@ -139,9 +139,9 @@
         c_obs = obs.astype(ct)
         c_code_book = code_book.astype(ct)
         if ct is single:
-            results = _vq.float_vq(c_obs, c_code_book)
+            results = _vq.vq(c_obs, c_code_book)
         elif ct is double:
-            results = _vq.double_vq(c_obs, c_code_book)
+            results = _vq.vq(c_obs, c_code_book)
         else:
             results = py_vq(obs, code_book)
     except ImportError:
@@ -512,7 +512,7 @@
     nd  = N.ndim(data)
     if nd == 1:
         d = 1
-        raise ValueError("Input of rank 1 not supported yet")
+        #raise ValueError("Input of rank 1 not supported yet")
     elif nd == 2:
         d = data.shape[1]
     else:
@@ -560,9 +560,21 @@
         for j in range(nc):
             mbs = N.where(label==j)
             if mbs[0].size > 0:
-                code[j,:] = N.mean(data[mbs], axis=0)
+                code[j] = N.mean(data[mbs], axis=0)
             else:
                 warnings.warn("One of the clusters are empty. " \
                               "Re-run kmean with a different initialization.")
 
     return code, label
+
+if __name__  == '__main__':
+    import _vq
+    a = N.random.randn(4, 2)
+    b = N.random.randn(2, 2)
+
+    print _vq.vq(a, b)
+    print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), N.array([[2.], [5.]]))
+    print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.]))
+    _vq.vq(a.astype(N.float32), b.astype(N.float32))
+    _vq.vq(a, b.astype(N.float32))
+    _vq.vq([0], b)


From scipy-svn at scipy.org  Wed Jun 20 06:23:38 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 20 Jun 2007 05:23:38 -0500 (CDT)
Subject: [Scipy-svn] r3111 - in trunk/Lib/cluster: . src
Message-ID: <20070620102338.6160339C12B@new.scipy.org>

Author: cdavid
Date: 2007-06-20 05:23:17 -0500 (Wed, 20 Jun 2007)
New Revision: 3111

Modified:
   trunk/Lib/cluster/setup.py
   trunk/Lib/cluster/src/vq.tpl
   trunk/Lib/cluster/src/vq_module.c
   trunk/Lib/cluster/vq.py
Log:
Change int to npy_intp for index array + cosmetic change in python code.

Modified: trunk/Lib/cluster/setup.py
===================================================================
--- trunk/Lib/cluster/setup.py	2007-06-19 15:07:48 UTC (rev 3110)
+++ trunk/Lib/cluster/setup.py	2007-06-20 10:23:17 UTC (rev 3111)
@@ -2,19 +2,15 @@
 
 from os.path import join
 
-def configuration(parent_package='',top_path=None):
+def configuration(parent_package = '', top_path = None):
     from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
-    config = Configuration('cluster',parent_package,top_path)
+    config = Configuration('cluster', parent_package, top_path)
 
     config.add_data_dir('tests')
 
     config.add_extension('_vq',
         sources=[join('src', 'vq_module.c'), join('src', 'vq.c')],
         include_dirs = [get_numpy_include_dirs()])
-    #config.add_extension('_vq',
-    #    sources=[join('src', 'vq_wrap.cpp')])
-    #config.add_extension('_c_vq',
-    #    sources=[join('src', 'vq.c') ])
 
     return config
 

Modified: trunk/Lib/cluster/src/vq.tpl
===================================================================
--- trunk/Lib/cluster/src/vq.tpl	2007-06-19 15:07:48 UTC (rev 3110)
+++ trunk/Lib/cluster/src/vq.tpl	2007-06-20 10:23:17 UTC (rev 3111)
@@ -1,6 +1,10 @@
 [+ AutoGen5 template c +]
 /*
  * vim:syntax=c
+ *
+ * This file implements vq for float and double in C. It is a direct
+ * translation from the swig interface which could not be generated anymore
+ * with recent swig
  */
 #include <stddef.h>
 #include <math.h>

Modified: trunk/Lib/cluster/src/vq_module.c
===================================================================
--- trunk/Lib/cluster/src/vq_module.c	2007-06-19 15:07:48 UTC (rev 3110)
+++ trunk/Lib/cluster/src/vq_module.c	2007-06-20 10:23:17 UTC (rev 3111)
@@ -1,5 +1,5 @@
 /*
- * Last Change: Tue Jun 19 11:00 PM 2007 J
+ * Last Change: Wed Jun 20 04:00 PM 2007 J
  *
  */
 #include <Python.h>
@@ -97,24 +97,24 @@
             if (dist_a == NULL) {
                 goto clean_code_a;
             }
-            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, PyArray_INTP, 0);
             if (index_a == NULL) {
                 goto clean_dist_a;
             }
             float_tvq((float*)obs_a->data, (float*)code_a->data, n, nc, d,
-                    (int*)index_a->data, (float*)dist_a->data);
+                    (npy_intp*)index_a->data, (float*)dist_a->data);
             break;
         case NPY_DOUBLE:
             dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0);
             if (dist_a == NULL) {
                 goto clean_code_a;
             }
-            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, PyArray_INTP, 0);
             if (index_a == NULL) {
                 goto clean_dist_a;
             }
             double_tvq((double*)obs_a->data, (double*)code_a->data, n, nc, d,
-                    (int*)index_a->data, (double*)dist_a->data);
+                    (npy_intp*)index_a->data, (double*)dist_a->data);
             break;
         default:
             PyErr_Format(PyExc_ValueError,
@@ -151,4 +151,3 @@
     Py_DECREF(obs_a);
     return NULL;
 }
-

Modified: trunk/Lib/cluster/vq.py
===================================================================
--- trunk/Lib/cluster/vq.py	2007-06-19 15:07:48 UTC (rev 3110)
+++ trunk/Lib/cluster/vq.py	2007-06-20 10:23:17 UTC (rev 3111)
@@ -181,7 +181,8 @@
     # d = number of features
     if N.ndim(obs) == 1:
         if not N.ndim(obs) == N.ndim(code_book):
-            raise ValueError("Observation and code_book should have the same rank")
+            raise ValueError(
+                    "Observation and code_book should have the same rank")
         else:
             return _py_vq_1d(obs, code_book)
     else:
@@ -192,7 +193,8 @@
         raise ValueError("Observation and code_book should have the same rank")
     elif not d == code_book.shape[1]:
         raise ValueError("Code book(%d) and obs(%d) should have the same " \
-                         "number of features (eg columns)""" % (code_book.shape[1], d))
+                         "number of features (eg columns)""" %
+                         (code_book.shape[1], d))
 
     code = zeros(n, dtype=int)
     min_dist = zeros(n)
@@ -547,11 +549,7 @@
 def _kmeans2(data, code, niter, nc):
     """ "raw" version of kmeans2. Do not use directly.
 
-    Run kmeans with a given initial codebook.
-
-    :undocumented
-
-    """
+    Run kmeans with a given initial codebook.  """
     for i in range(niter):
         # Compute the nearest neighbour for each obs
         # using the current code book
@@ -568,13 +566,15 @@
     return code, label
 
 if __name__  == '__main__':
-    import _vq
-    a = N.random.randn(4, 2)
-    b = N.random.randn(2, 2)
+    pass
+    #import _vq
+    #a = N.random.randn(4, 2)
+    #b = N.random.randn(2, 2)
 
-    print _vq.vq(a, b)
-    print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), N.array([[2.], [5.]]))
-    print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.]))
-    _vq.vq(a.astype(N.float32), b.astype(N.float32))
-    _vq.vq(a, b.astype(N.float32))
-    _vq.vq([0], b)
+    #print _vq.vq(a, b)
+    #print _vq.vq(N.array([[1], [2], [3], [4], [5], [6.]]), 
+    #        N.array([[2.], [5.]]))
+    #print _vq.vq(N.array([1, 2, 3, 4, 5, 6.]), N.array([2., 5.]))
+    #_vq.vq(a.astype(N.float32), b.astype(N.float32))
+    #_vq.vq(a, b.astype(N.float32))
+    #_vq.vq([0], b)


From scipy-svn at scipy.org  Wed Jun 20 12:35:36 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Wed, 20 Jun 2007 11:35:36 -0500 (CDT)
Subject: [Scipy-svn] r3112 - trunk/Lib/cluster/src
Message-ID: <20070620163536.020C139C03A@new.scipy.org>

Author: cdavid
Date: 2007-06-20 11:35:16 -0500 (Wed, 20 Jun 2007)
New Revision: 3112

Modified:
   trunk/Lib/cluster/src/vq.c
   trunk/Lib/cluster/src/vq.h
   trunk/Lib/cluster/src/vq.tpl
   trunk/Lib/cluster/src/vq_module.c
Log:
Convert int to long long for index for correct support on 64 bits arch

Modified: trunk/Lib/cluster/src/vq.c
===================================================================
--- trunk/Lib/cluster/src/vq.c	2007-06-20 10:23:17 UTC (rev 3111)
+++ trunk/Lib/cluster/src/vq.c	2007-06-20 16:35:16 UTC (rev 3112)
@@ -1,5 +1,9 @@
 /*
  * vim:syntax=c
+ *
+ * This file implements vq for float and double in C. It is a direct
+ * translation from the swig interface which could not be generated anymore
+ * with recent swig
  */
 #include <stddef.h>
 #include <math.h>
@@ -14,7 +18,7 @@
 
 static int float_vq_1d(const float *in, int n, 
     const float *init, int ncode, 
-    int *code, float *mdist)
+    long long *code, float *mdist)
 {
     int i, j;
     float m, d;
@@ -37,7 +41,7 @@
 
 static int float_vq_obs(const float *obs,
     float *code_book, int Ncodes, int Nfeatures,
-       int* code, float *lowest_dist)
+       long long* code, float *lowest_dist)
 {
 	int i,j,k=0;
 	float dist, diff;
@@ -64,7 +68,7 @@
     float* obs,
     float* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    int* codes, float* lowest_dist)
+    long long* codes, float* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		
@@ -78,7 +82,7 @@
 
 static int double_vq_1d(const double *in, int n, 
     const double *init, int ncode, 
-    int *code, double *mdist)
+    long long *code, double *mdist)
 {
     int i, j;
     double m, d;
@@ -101,7 +105,7 @@
 
 static int double_vq_obs(const double *obs,
     double *code_book, int Ncodes, int Nfeatures,
-       int* code, double *lowest_dist)
+       long long* code, double *lowest_dist)
 {
 	int i,j,k=0;
 	double dist, diff;
@@ -128,7 +132,7 @@
     double* obs,
     double* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    int* codes, double* lowest_dist)
+    long long* codes, double* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		

Modified: trunk/Lib/cluster/src/vq.h
===================================================================
--- trunk/Lib/cluster/src/vq.h	2007-06-20 10:23:17 UTC (rev 3111)
+++ trunk/Lib/cluster/src/vq.h	2007-06-20 16:35:16 UTC (rev 3112)
@@ -2,9 +2,9 @@
 #define _VQ_H
 
 int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, 
-        int Nfeatures, int* codes, double* lowest_dist);
+        int Nfeatures, long long* codes, double* lowest_dist);
 
 int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, 
-        int Nfeatures, int* codes, float* lowest_dist);
+        int Nfeatures, long long* codes, float* lowest_dist);
 
 #endif

Modified: trunk/Lib/cluster/src/vq.tpl
===================================================================
--- trunk/Lib/cluster/src/vq.tpl	2007-06-20 10:23:17 UTC (rev 3111)
+++ trunk/Lib/cluster/src/vq.tpl	2007-06-20 16:35:16 UTC (rev 3112)
@@ -19,7 +19,7 @@
 [+ FOR data_type +]
 static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, 
     const [+ (get "type_name") +] *init, int ncode, 
-    int *code, [+ (get "type_name") +] *mdist)
+    long long *code, [+ (get "type_name") +] *mdist)
 {
     int i, j;
     [+ (get "data_type") +] m, d;
@@ -42,7 +42,7 @@
 
 static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs,
     [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures,
-       int* code, [+ (get "data_type") +] *lowest_dist)
+       long long* code, [+ (get "data_type") +] *lowest_dist)
 {
 	int i,j,k=0;
 	[+ (get "data_type") +] dist, diff;
@@ -69,7 +69,7 @@
     [+ (get "data_type") +]* obs,
     [+ (get "data_type") +]* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    int* codes, [+ (get "data_type") +]* lowest_dist)
+    long long* codes, [+ (get "data_type") +]* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		

Modified: trunk/Lib/cluster/src/vq_module.c
===================================================================
--- trunk/Lib/cluster/src/vq_module.c	2007-06-20 10:23:17 UTC (rev 3111)
+++ trunk/Lib/cluster/src/vq_module.c	2007-06-20 16:35:16 UTC (rev 3112)
@@ -27,7 +27,8 @@
     PyArrayObject *obs_a, *code_a;
     PyArrayObject *index_a, *dist_a;
     int typenum1, typenum2;
-    int n, nc, d, nd;
+    int nc, nd;
+    npy_intp n, d;
 
     if ( !PyArg_ParseTuple(args, "OO", &obs, &code) ) {
         return NULL;


From scipy-svn at scipy.org  Fri Jun 22 04:37:26 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 22 Jun 2007 03:37:26 -0500 (CDT)
Subject: [Scipy-svn] r3113 - trunk/Lib/sandbox/pyem
Message-ID: <20070622083726.3311039C038@new.scipy.org>

Author: cdavid
Date: 2007-06-22 03:37:20 -0500 (Fri, 22 Jun 2007)
New Revision: 3113

Modified:
   trunk/Lib/sandbox/pyem/gmm_em.py
Log:
Refactor update step for EM (split diag and full case in subfunction)

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-20 16:35:16 UTC (rev 3112)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-22 08:37:20 UTC (rev 3113)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Tue Jun 12 08:00 PM 2007 J
+# Last Change: Thu Jun 21 03:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -11,7 +11,6 @@
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
 #   not sure which ones are appropriates
 #   - improve EM trainer
-#   - online EM
 
 import numpy as N
 #import numpy.linalg as lin
@@ -186,61 +185,83 @@
         tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True)
         # multiply by the weight
         tgd	+= N.log(self.gm.w)
-        # Normalize to get a pdf
+        # Normalize to get a (log) pdf
         gd	= tgd  - densities.logsumexp(tgd)[:, N.newaxis]
 
         return gd, tgd
 
-    def update_em(self, data, gamma):
-        """Computes update of the Gaussian Mixture Model (M step)
-        from the a posteriori pdf, computed by gmm_posterior
-        (E step).
-        """
+    def _update_em_diag(self, data, gamma, ngamma):
+        """Computes update of the Gaussian Mixture Model (M step) from the
+        responsabilities gamma and normalized responsabilities ngamma, for
+        diagonal models."""
+        #XXX: caching SS may decrease memory consumption
         k = self.gm.k
         d = self.gm.d
         n = data.shape[0]
         invn = 1.0/n
-        mGamma = N.sum(gamma, axis = 0)
 
-        if self.gm.mode == 'diag':
-            mu = N.zeros((k, d))
-            va = N.zeros((k, d))
-            gamma = gamma.T
-            for c in range(k):
-                x = N.dot(gamma[c:c+1, :], data)[0, :]
-                xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :]
+        mu = N.zeros((k, d))
+        va = N.zeros((k, d))
 
-                mu[c, :] = x / mGamma[c]
-                va[c, :] = xx  / mGamma[c] - mu[c, :] ** 2
-            w   = invn * mGamma
+        for c in range(k):
+            x = N.dot(gamma.T[c:c+1, :], data)[0, :]
+            xx = N.dot(gamma.T[c:c+1, :], data ** 2)[0, :]
 
-        elif self.gm.mode == 'full':
-            # In full mode, this is the bottleneck: the triple loop
-            # kills performances. This is pretty straightforward
-            # algebra, so computing it in C should not be too difficult. The
-            # real problem is to have valid covariance matrices, and to keep
-            # them positive definite, maybe with special storage... Not sure
-            # it really worth the risk
-            mu  = N.zeros((k, d))
-            va  = N.zeros((k*d, d))
+            mu[c, :] = x / ngamma[c]
+            va[c, :] = xx  / ngamma[c] - mu[c, :] ** 2
+        w   = invn * ngamma
 
-            gamma = gamma.transpose()
-            for c in range(k):
-                #x   = N.sum(N.outer(gamma[:, c], 
-                #            N.ones((1, d))) * data, axis = 0)
-                x = N.dot(gamma[c:c+1, :], data)[0, :]
-                xx = N.zeros((d, d))
-                
-                # This should be much faster than recursing on n...
-                for i in range(d):
-                    for j in range(d):
-                        xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :],
-                                axis = 0)
+        return w, mu, va
 
-                mu[c, :] = x / mGamma[c]
-                va[c*d:c*d+d, :] = xx  / mGamma[c] \
-                        - N.outer(mu[c, :], mu[c, :])
-            w   = invn * mGamma
+    def _update_em_full(self, data, gamma, ngamma):
+        """Computes update of the Gaussian Mixture Model (M step) from the
+        responsabilities gamma and normalized responsabilities ngamma, for
+        full models."""
+        k = self.gm.k
+        d = self.gm.d
+        n = data.shape[0]
+        invn = 1.0/n
+
+        # In full mode, this is the bottleneck: the triple loop
+        # kills performances. This is pretty straightforward
+        # algebra, so computing it in C should not be too difficult. The
+        # real problem is to have valid covariance matrices, and to keep
+        # them positive definite, maybe with special storage... Not sure
+        # it really worth the risk
+        mu  = N.zeros((k, d))
+        va  = N.zeros((k*d, d))
+
+        #XXX: caching SS may decrease memory consumption
+        for c in range(k):
+            #x   = N.sum(N.outer(gamma[:, c], 
+            #            N.ones((1, d))) * data, axis = 0)
+            x = N.dot(gamma.T[c:c+1, :], data)[0, :]
+            xx = N.zeros((d, d))
+            
+            # This should be much faster than recursing on n...
+            for i in range(d):
+                for j in range(d):
+                    xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma.T[c, :],
+                            axis = 0)
+
+            mu[c, :] = x / ngamma[c]
+            va[c*d:c*d+d, :] = xx  / ngamma[c] \
+                    - N.outer(mu[c, :], mu[c, :])
+        w   = invn * ngamma
+
+        return w, mu, va
+
+    def update_em(self, data, gamma):
+        """Computes update of the Gaussian Mixture Model (M step)
+        from the a posteriori pdf, computed by gmm_posterior
+        (E step).
+        """
+        ngamma = N.sum(gamma, axis = 0)
+
+        if self.gm.mode == 'diag':
+            w, mu, va = self._update_em_diag(data, gamma, ngamma)
+        elif self.gm.mode == 'full':
+            w, mu, va = self._update_em_full(data, gamma, ngamma)
         else:
             raise GmmParamError("varmode not recognized")
 
@@ -344,12 +365,13 @@
         like    = N.zeros(maxiter)
 
         # Em computation, with computation of the likelihood
-        g, tgd      = model.compute_responsabilities(data)
-        like[0]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+        g, tgd  = model.compute_responsabilities(data)
+        # TODO: do it in log domain instead
+        like[0] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
         model.update_em(data, g)
         for i in range(1, maxiter):
-            g, tgd      = model.compute_responsabilities(data)
-            like[i]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+            g, tgd  = model.compute_responsabilities(data)
+            like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
             model.update_em(data, g)
             if has_em_converged(like[i], like[i-1], thresh):
                 return like[0:i]


From scipy-svn at scipy.org  Fri Jun 22 04:55:35 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 22 Jun 2007 03:55:35 -0500 (CDT)
Subject: [Scipy-svn] r3115 - in trunk/Lib/sandbox/pyem: . data
Message-ID: <20070622085535.2949139C078@new.scipy.org>

Author: cdavid
Date: 2007-06-22 03:55:20 -0500 (Fri, 22 Jun 2007)
New Revision: 3115

Modified:
   trunk/Lib/sandbox/pyem/TODO
   trunk/Lib/sandbox/pyem/data/setup.py
   trunk/Lib/sandbox/pyem/gmm_em.py
Log:
Add pendigits as a subpackage of data for distutils.

Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO	2007-06-22 08:39:13 UTC (rev 3114)
+++ trunk/Lib/sandbox/pyem/TODO	2007-06-22 08:55:20 UTC (rev 3115)
@@ -1,10 +1,9 @@
-# Last Change: Sat Jun 09 04:00 PM 2007 J
+# Last Change: Fri Jun 22 05:00 PM 2007 J
 
 Things which must be implemented for a 1.0 version (in importante order)
     - A classifier
     - handle rank 1 for 1d data
     - basic regularization
-    - docstrings
     - demo for pdf estimation, discriminant analysis and clustering
     - scaling of data: maybe something to handle scaling internally ?
 

Modified: trunk/Lib/sandbox/pyem/data/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/setup.py	2007-06-22 08:39:13 UTC (rev 3114)
+++ trunk/Lib/sandbox/pyem/data/setup.py	2007-06-22 08:55:20 UTC (rev 3115)
@@ -4,6 +4,7 @@
     from numpy.distutils.misc_util import Configuration
     config = Configuration('data',parent_package,top_path)
     config.add_subpackage('oldfaithful')
+    config.add_subpackage('pendigits')
     config.make_config_py() # installs __config__.py
     return config
 

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-22 08:39:13 UTC (rev 3114)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-22 08:55:20 UTC (rev 3115)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Jun 21 03:00 PM 2007 J
+# Last Change: Fri Jun 22 05:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using


From scipy-svn at scipy.org  Fri Jun 22 05:10:00 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 22 Jun 2007 04:10:00 -0500 (CDT)
Subject: [Scipy-svn] r3116 - trunk/Lib/cluster/src
Message-ID: <20070622091000.6ABB039C128@new.scipy.org>

Author: cdavid
Date: 2007-06-22 04:09:20 -0500 (Fri, 22 Jun 2007)
New Revision: 3116

Modified:
   trunk/Lib/cluster/src/vq.c
   trunk/Lib/cluster/src/vq.h
   trunk/Lib/cluster/src/vq.tpl
Log:
Convert index to npy_intp to avoid problems betweem 32 and 64 bits.

Modified: trunk/Lib/cluster/src/vq.c
===================================================================
--- trunk/Lib/cluster/src/vq.c	2007-06-22 08:55:20 UTC (rev 3115)
+++ trunk/Lib/cluster/src/vq.c	2007-06-22 09:09:20 UTC (rev 3116)
@@ -8,6 +8,7 @@
 #include <stddef.h>
 #include <math.h>
 
+#include "vq.h"
 /*
  * results is put into code, which contains initially the initial code
  *
@@ -16,9 +17,10 @@
 const static double rbig = 1e100;
 
 
+#if 0
 static int float_vq_1d(const float *in, int n, 
     const float *init, int ncode, 
-    long long *code, float *mdist)
+    npy_intp *code, float *mdist)
 {
     int i, j;
     float m, d;
@@ -38,10 +40,11 @@
     }
     return 0;
 }
+#endif
 
 static int float_vq_obs(const float *obs,
     float *code_book, int Ncodes, int Nfeatures,
-       long long* code, float *lowest_dist)
+       npy_intp* code, float *lowest_dist)
 {
 	int i,j,k=0;
 	float dist, diff;
@@ -68,7 +71,7 @@
     float* obs,
     float* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    long long* codes, float* lowest_dist)
+    npy_intp* codes, float* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		
@@ -80,9 +83,10 @@
     return 0;
 }
 
+#if 0
 static int double_vq_1d(const double *in, int n, 
     const double *init, int ncode, 
-    long long *code, double *mdist)
+    npy_intp *code, double *mdist)
 {
     int i, j;
     double m, d;
@@ -102,10 +106,11 @@
     }
     return 0;
 }
+#endif
 
 static int double_vq_obs(const double *obs,
     double *code_book, int Ncodes, int Nfeatures,
-       long long* code, double *lowest_dist)
+       npy_intp* code, double *lowest_dist)
 {
 	int i,j,k=0;
 	double dist, diff;
@@ -132,7 +137,7 @@
     double* obs,
     double* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    long long* codes, double* lowest_dist)
+    npy_intp* codes, double* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		

Modified: trunk/Lib/cluster/src/vq.h
===================================================================
--- trunk/Lib/cluster/src/vq.h	2007-06-22 08:55:20 UTC (rev 3115)
+++ trunk/Lib/cluster/src/vq.h	2007-06-22 09:09:20 UTC (rev 3116)
@@ -1,10 +1,14 @@
 #ifndef _VQ_H_
 #define _VQ_H
 
+#include <Python.h>
+
+#include <numpy/arrayobject.h>
+
 int double_tvq(double* obs, double* code_book, int Nobs, int Ncodes, 
-        int Nfeatures, long long* codes, double* lowest_dist);
+        int Nfeatures, npy_intp* codes, double* lowest_dist);
 
 int float_tvq(float* obs, float* code_book, int Nobs, int Ncodes, 
-        int Nfeatures, long long* codes, float* lowest_dist);
+        int Nfeatures, npy_intp* codes, float* lowest_dist);
 
 #endif

Modified: trunk/Lib/cluster/src/vq.tpl
===================================================================
--- trunk/Lib/cluster/src/vq.tpl	2007-06-22 08:55:20 UTC (rev 3115)
+++ trunk/Lib/cluster/src/vq.tpl	2007-06-22 09:09:20 UTC (rev 3116)
@@ -9,6 +9,7 @@
 #include <stddef.h>
 #include <math.h>
 
+#include "vq.h"
 /*
  * results is put into code, which contains initially the initial code
  *
@@ -17,9 +18,10 @@
 const static double rbig = 1e100;
 
 [+ FOR data_type +]
+#if 0
 static int [+ (get "type_name") +]_vq_1d(const [+ (get "type_name") +] *in, int n, 
     const [+ (get "type_name") +] *init, int ncode, 
-    long long *code, [+ (get "type_name") +] *mdist)
+    npy_intp *code, [+ (get "type_name") +] *mdist)
 {
     int i, j;
     [+ (get "data_type") +] m, d;
@@ -39,10 +41,11 @@
     }
     return 0;
 }
+#endif
 
 static int [+ (get "type_name") +]_vq_obs(const [+ (get "data_type") +] *obs,
     [+ (get "data_type") +] *code_book, int Ncodes, int Nfeatures,
-       long long* code, [+ (get "data_type") +] *lowest_dist)
+       npy_intp* code, [+ (get "data_type") +] *lowest_dist)
 {
 	int i,j,k=0;
 	[+ (get "data_type") +] dist, diff;
@@ -69,7 +72,7 @@
     [+ (get "data_type") +]* obs,
     [+ (get "data_type") +]* code_book, 
     int Nobs, int Ncodes, int Nfeatures,
-    long long* codes, [+ (get "data_type") +]* lowest_dist)
+    npy_intp* codes, [+ (get "data_type") +]* lowest_dist)
 {
     int i;
 	for( i = 0; i < Nobs; i++) {		


From scipy-svn at scipy.org  Mon Jun 25 18:38:04 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Mon, 25 Jun 2007 17:38:04 -0500 (CDT)
Subject: [Scipy-svn] r3117 - in trunk/Lib/stats: . tests
Message-ID: <20070625223804.F233039C059@new.scipy.org>

Author: stefan
Date: 2007-06-25 17:37:46 -0500 (Mon, 25 Jun 2007)
New Revision: 3117

Modified:
   trunk/Lib/stats/distributions.py
   trunk/Lib/stats/tests/test_distributions.py
Log:
Fix geometric probability mass function. Add tests.


Modified: trunk/Lib/stats/distributions.py
===================================================================
--- trunk/Lib/stats/distributions.py	2007-06-22 09:09:20 UTC (rev 3116)
+++ trunk/Lib/stats/distributions.py	2007-06-25 22:37:46 UTC (rev 3117)
@@ -3913,7 +3913,7 @@
     def _argcheck(self, pr):
         return (pr<=1) & (pr >= 0)
     def _pmf(self, k, pr):
-        return (1-pr)**k * pr
+        return (1-pr)**(k-1) * pr
     def _cdf(self, x, pr):
         k = floor(x)
         return (1.0-(1.0-pr)**k)

Modified: trunk/Lib/stats/tests/test_distributions.py
===================================================================
--- trunk/Lib/stats/tests/test_distributions.py	2007-06-22 09:09:20 UTC (rev 3116)
+++ trunk/Lib/stats/tests/test_distributions.py	2007-06-25 22:37:46 UTC (rev 3117)
@@ -7,7 +7,7 @@
 
 set_package_path()
 import numpy
-from numpy import typecodes
+from numpy import typecodes, array
 import stats
 restore_path()
 
@@ -138,6 +138,18 @@
         assert(isinstance(val, numpy.ndarray))
         assert(val.dtype.char in typecodes['AllInteger'])
 
+    def check_pmf(self):
+        vals = stats.geom.pmf([1,2,3],0.5)
+        assert_array_almost_equal(vals,[0.5,0.25,0.125])
+
+    def check_cdf_sf(self):
+        vals = stats.geom.cdf([1,2,3],0.5)
+        vals_sf = stats.geom.sf([1,2,3],0.5)
+        expected = array([0.5,0.75,0.875])
+        assert_array_almost_equal(vals,expected)
+        assert_array_almost_equal(vals_sf,1-expected)
+
+
 class test_hypergeom(NumpyTestCase):
     def check_rvs(self):
         vals = stats.hypergeom.rvs(20, 10, 3, size=(2, 50))


From scipy-svn at scipy.org  Thu Jun 28 04:23:37 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu, 28 Jun 2007 03:23:37 -0500 (CDT)
Subject: [Scipy-svn] r3118 - trunk/Lib/special
Message-ID: <20070628082337.C4F9539C0B2@new.scipy.org>

Author: cookedm
Date: 2007-06-28 03:23:20 -0500 (Thu, 28 Jun 2007)
New Revision: 3118

Modified:
   trunk/Lib/special/__init__.py
Log:
Lib/special: replace some character codes with dtypes


Modified: trunk/Lib/special/__init__.py
===================================================================
--- trunk/Lib/special/__init__.py	2007-06-25 22:37:46 UTC (rev 3117)
+++ trunk/Lib/special/__init__.py	2007-06-28 08:23:20 UTC (rev 3118)
@@ -2,7 +2,7 @@
 # special - Special Functions
 #
 
-from info import __doc__
+from info import __doc__, __docformat__
 #from special_version import special_version as __version__
 
 from basic import *


From scipy-svn at scipy.org  Thu Jun 28 04:24:03 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu, 28 Jun 2007 03:24:03 -0500 (CDT)
Subject: [Scipy-svn] r3119 - trunk/Lib/special
Message-ID: <20070628082403.4384E39C0BB@new.scipy.org>

Author: cookedm
Date: 2007-06-28 03:23:59 -0500 (Thu, 28 Jun 2007)
New Revision: 3119

Modified:
   trunk/Lib/special/basic.py
Log:
Lib/special: Really replace some character codes with dtypes


Modified: trunk/Lib/special/basic.py
===================================================================
--- trunk/Lib/special/basic.py	2007-06-28 08:23:20 UTC (rev 3118)
+++ trunk/Lib/special/basic.py	2007-06-28 08:23:59 UTC (rev 3119)
@@ -25,10 +25,10 @@
     x,n = asarray(x), asarray(n)
     n = asarray(n + (x-x))
     x = asarray(x + (n-n))
-    if x.dtype.char in ['fFdD']:
-        ytype = x.dtype.char
+    if issubdtype(x.dtype, inexact):
+        ytype = x.dtype
     else:
-        ytype = 'd'
+        ytype = float
     y = zeros(x.shape,ytype)
 
     mask1 = (n <= 0) | (n <> floor(n))
@@ -406,7 +406,7 @@
     Limit as q->infinity of 1F1(q;a;z/q)
     """
     z = asarray(z)
-    if z.dtype.char in ['F', 'D']:
+    if issubdtype(z.dtype, complexfloating):
         arg = 2*sqrt(abs(z))
         num = where(z>=0, iv(v-1,arg), jv(v-1,arg))
         den = abs(z)**((v-1.0)/2)


From scipy-svn at scipy.org  Thu Jun 28 04:27:13 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu, 28 Jun 2007 03:27:13 -0500 (CDT)
Subject: [Scipy-svn] r3120 - trunk/Lib/special/cephes
Message-ID: <20070628082713.E002F39C0B2@new.scipy.org>

Author: cookedm
Date: 2007-06-28 03:24:55 -0500 (Thu, 28 Jun 2007)
New Revision: 3120

Modified:
   trunk/Lib/special/cephes/gamma.c
Log:
Lib/special: gamma(x) for x >~ 700 would return nan instead of inf. This fixes $53.


Modified: trunk/Lib/special/cephes/gamma.c
===================================================================
--- trunk/Lib/special/cephes/gamma.c	2007-06-28 08:23:59 UTC (rev 3119)
+++ trunk/Lib/special/cephes/gamma.c	2007-06-28 08:24:55 UTC (rev 3120)
@@ -291,6 +291,13 @@
 {
 double y, w, v;
 
+if (x >= MAXGAM) {
+#ifdef INFINITIES
+	return (INFINITY);
+#else
+	return (MAXNUM);
+#endif
+}
 w = 1.0/x;
 w = 1.0 + w * polevl( w, STIR, 4 );
 y = exp(x);


From scipy-svn at scipy.org  Fri Jun 29 00:25:19 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Thu, 28 Jun 2007 23:25:19 -0500 (CDT)
Subject: [Scipy-svn] r3121 - trunk/Lib/sparse
Message-ID: <20070629042519.C6F2D39C040@new.scipy.org>

Author: wnbell
Date: 2007-06-28 23:23:55 -0500 (Thu, 28 Jun 2007)
New Revision: 3121

Modified:
   trunk/Lib/sparse/sparse.py
Log:
small edit to CSR/CSC transpose and conj


Modified: trunk/Lib/sparse/sparse.py
===================================================================
--- trunk/Lib/sparse/sparse.py	2007-06-28 08:24:55 UTC (rev 3120)
+++ trunk/Lib/sparse/sparse.py	2007-06-29 04:23:55 UTC (rev 3121)
@@ -667,29 +667,11 @@
 
     def _transpose(self, cls, copy=False):
         M, N = self.shape
-        if copy:
-            data   = self.data.copy()
-            index = self.indices.copy()
-            indptr = self.indptr.copy()
-        else:
-            data   = self.data
-            index = self.indices
-            indptr = self.indptr
-        return cls((data,index,indptr),(N,M))
+        return cls((self.data,self.indices,self.indptr),(N,M),copy=copy)
         
 
     def conj(self, copy=False):
-        new = self.__class__(self.shape, nzmax=self.nzmax, dtype=self.dtype)
-        if copy:
-            new.data = self.data.conj().copy()
-            new.indices = self.indices.conj().copy()
-            new.indptr = self.indptr.conj().copy()
-        else:
-            new.data = self.data.conj()
-            new.indices = self.indices.conj()
-            new.indptr = self.indptr.conj()
-        new._check()
-        return new
+        return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy)
 
     def _ensure_sorted_indices(self, shape0, shape1, inplace=False):
         """Return a copy of this matrix where the row indices are sorted


From scipy-svn at scipy.org  Fri Jun 29 02:59:14 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 29 Jun 2007 01:59:14 -0500 (CDT)
Subject: [Scipy-svn] r3122 - trunk/Lib/sparse
Message-ID: <20070629065914.3203339C0A1@new.scipy.org>

Author: wnbell
Date: 2007-06-29 01:59:02 -0500 (Fri, 29 Jun 2007)
New Revision: 3122

Modified:
   trunk/Lib/sparse/sparse.py
Log:
Made CSR/CSC format _check() more comprehensive by default.
Added constructor arg to (opyionalls) avoid expensive format check.
This makes transpose() a O(1) operation instead of O(N) as before.


Modified: trunk/Lib/sparse/sparse.py
===================================================================
--- trunk/Lib/sparse/sparse.py	2007-06-29 04:23:55 UTC (rev 3121)
+++ trunk/Lib/sparse/sparse.py	2007-06-29 06:59:02 UTC (rev 3122)
@@ -498,7 +498,7 @@
                                          self.indptr, self.indices, \
                                          self.data, other.indptr, \
                                          other.indices, other.data)
-            return self.__class__((data, ind, indptr), self.shape)
+            return self.__class__((data, ind, indptr), self.shape, check=False)
         elif isdense(other):
             # Convert this matrix to a dense matrix and add them
             return other + self.todense()
@@ -558,7 +558,7 @@
                                                self.indptr, self.indices, \
                                                self.data, other.indptr, \
                                                other.indices, other.data)
-            return self.__class__((data, ind, indptr), self.shape)
+            return self.__class__((data, ind, indptr), self.shape, check=False)
         else:
             raise TypeError, "unsupported type for sparse matrix power"
 
@@ -573,7 +573,7 @@
             indptr, ind, data = fn(M, N, self.indptr, self.indices, \
                                    self.data, other.indptr, \
                                    other.indices, other.data)
-            return self.__class__((data, ind, indptr), (M, N))      
+            return self.__class__((data, ind, indptr), (M, N),check=False)      
         elif isdense(other):
             # This is SLOW!  We need a more efficient implementation
             # of sparse * dense matrix multiplication!
@@ -636,12 +636,8 @@
 
 
     def copy(self):
-        new = self.__class__(self.shape, nzmax=self.nzmax, dtype=self.dtype)
-        new.data = self.data.copy()
-        new.indices = self.indices.copy()
-        new.indptr = self.indptr.copy()
-        new._check()
-        return new
+        return self.__class__((self.data.copy(),self.indices.copy(),self.indptr.copy()), \
+                              self.shape, dtype=self.dtype, check=False)
 
 
     def _get_slice(self, i, start, stop, stride, dims):
@@ -667,11 +663,11 @@
 
     def _transpose(self, cls, copy=False):
         M, N = self.shape
-        return cls((self.data,self.indices,self.indptr),(N,M),copy=copy)
+        return cls((self.data,self.indices,self.indptr),(N,M),copy=copy,check=False)
         
 
     def conj(self, copy=False):
-        return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy)
+        return self.__class__((self.data.conj(),self.indices,self.indptr),self.shape,copy=copy,check=False)
 
     def _ensure_sorted_indices(self, shape0, shape1, inplace=False):
         """Return a copy of this matrix where the row indices are sorted
@@ -706,7 +702,7 @@
           - csc_matrix((data, row, ptr), [(M, N)])
             standard CSC representation
     """
-    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False):
+    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True):
         _cs_matrix.__init__(self)
         if isdense(arg1):
             self.dtype = getdtype(dtype, arg1)
@@ -776,11 +772,11 @@
                         self.dtype = getdtype(dtype, s)
                         if copy:
                             self.data = array(s)
-                            self.indices = array(rowind)
+                            self.indices = array(rowind, dtype=intc)
                             self.indptr = array(indptr, dtype=intc)
                         else:
                             self.data = asarray(s)
-                            self.indices = asarray(rowind)
+                            self.indices = asarray(rowind, dtype=intc)
                             self.indptr = asarray(indptr, dtype=intc)
                     except:
                         raise ValueError, "unrecognized form for csc_matrix constructor"
@@ -797,29 +793,37 @@
         else:
             raise ValueError, "unrecognized form for csc_matrix constructor"
 
-        # Read existing matrix dimensions
-        try:
-            (oldM, oldN) = self.shape
-        except:
-            oldM = oldN = None
+
+
         # Read matrix dimensions given, if any
         if dims is not None:
             try:
                 (M, N) = dims
+                M,N = int(M),int(N)
             except (TypeError, ValueError), e:
                 raise TypeError, "dimensions not understood"
         else:
+            # Read existing matrix dimensions
+            try:
+                (oldM, oldN) = self.shape
+            except:
+                oldM = oldN = None
+
+            # Expand if necessary
             M = N = None
-        if len(self.indices) > 0:
-            M = max(oldM, M, int(amax(self.indices)) + 1)
-        else:
-            # Matrix is completely empty
-            M = max(oldM, M)
-        N = max(0, oldN, N, len(self.indptr) - 1)
+            N = max(0, oldN, N, len(self.indptr) - 1)
+            if len(self.indices) > 0:
+                M = max(oldM, M, int(amax(self.indices)) + 1)
+            else:
+                # Matrix is completely empty
+                M = max(oldM, M)
+                
         self.shape = (M, N)
-        self._check()
 
-    def _check(self):
+        self._check(check)
+
+
+    def _check(self,full_check=True):
         # some functions pass floats
         self.shape = tuple([int(x) for x in self.shape])
 
@@ -832,15 +836,23 @@
                   "should be rank 1"
         if (len(self.data) != nzmax):
             raise ValueError, "data and row list should have same length"
+        if (self.indptr[0] != 0):
+            raise ValueError,"index pointer should start with 0"
         if (len(self.indptr) != N+1):
             raise ValueError, "index pointer should be of of size N+1"
         if (nzmax < nnz):
             raise ValueError, "nzmax must not be less than nnz"
-        if (nnz>0) and (amax(self.indices[:nnz]) >= M):
-            raise ValueError, "row values must be < M"
-        if (nnz>0) and (amin(self.indices[:nnz]) < 0):
-            raise ValueError, "row values must be >= 0"
 
+        if full_check:
+            #check format validity (more expensive)
+            if nnz > 0:
+                if amax(self.indices[:nnz]) >= M:
+                    raise ValueError, "row values must be < M"
+                if amin(self.indices[:nnz]) < 0:
+                    raise ValueError, "row values must be >= 0"
+            if numpy.diff(self.indptr).min() < 0:
+                raise ValueError,'indptr values must form a non-decreasing sequence'
+
         if (self.indptr[-1] > len(self.indices)):
             raise ValueError, \
                   "Last value of index list should be less than "\
@@ -883,7 +895,7 @@
                                             self.indptr, self.indices, \
                                             self.data, ocs.indptr, \
                                             ocs.indices, ocs.data)
-            return csc_matrix((data, rowind, indptr), self.shape)
+            return csc_matrix((data, rowind, indptr), self.shape, check=False)
         elif isdense(other):
             # Convert this matrix to a dense matrix and add them.
             return self.todense() + other
@@ -1040,7 +1052,7 @@
     def tocsr(self):
         indptr, colind, data = csctocsr(self.shape[0], self.shape[1], \
                                         self.indptr, self.indices, self.data)
-        return csr_matrix((data, colind, indptr), self.shape)
+        return csr_matrix((data, colind, indptr), self.shape, check=False)
 
     def _toother(self):
         return self.tocsr()
@@ -1092,7 +1104,7 @@
           - csr_matrix((data, col, ptr), [dims=(M, N)])
             standard CSR representation
     """
-    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False):
+    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True):
         _cs_matrix.__init__(self)
         if isdense(arg1):
             self.dtype = getdtype(dtype, arg1)
@@ -1157,11 +1169,11 @@
                         self.dtype = getdtype(dtype, s)
                         if copy:
                             self.data = array(s, dtype=self.dtype)
-                            self.indices = array(colind)
+                            self.indices = array(colind, dtype=intc)
                             self.indptr = array(indptr, dtype=intc)
                         else:
                             self.data = asarray(s, dtype=self.dtype)
-                            self.indices = asarray(colind)
+                            self.indices = asarray(colind, dtype=intc)
                             self.indptr = asarray(indptr, dtype=intc)
                 else:
                     # (data, ij) format
@@ -1176,11 +1188,7 @@
         else:
             raise ValueError, "unrecognized form for csr_matrix constructor"
 
-        # Read existing matrix dimensions
-        try:
-            (oldM, oldN) = self.shape
-        except:
-            oldM = oldN = None
+            
         # Read matrix dimensions given, if any
         if dims is not None:
             try:
@@ -1188,17 +1196,25 @@
             except (TypeError, ValueError), e:
                 raise TypeError, "dimensions not understood"
         else:
+            # Read existing matrix dimensions
+            try:
+                (oldM, oldN) = self.shape
+            except:
+                oldM = oldN = None
+
             M = N = None
-        M = max(0, oldM, M, len(self.indptr) - 1)
-        if len(self.indices) > 0:
-            N = max(oldN, N, int(amax(self.indices)) + 1)
-        else:
-            # Matrix is completely empty
-            N = max(oldN, N)
+            M = max(0, oldM, M, len(self.indptr) - 1)
+            if len(self.indices) > 0:
+                N = max(oldN, N, int(amax(self.indices)) + 1)
+            else:
+                # Matrix is completely empty
+                N = max(oldN, N)
+
         self.shape = (M, N)
-        self._check()
+        
+        self._check(check)
 
-    def _check(self):
+    def _check(self,full_check=True):
         # some functions pass floats
         self.shape = tuple([int(x) for x in self.shape])
 
@@ -1211,12 +1227,22 @@
                   "should be rank 1"
         if (len(self.data) != nzmax):
             raise ValueError, "data and row list should have same length"
+        if (self.indptr[0] != 0):
+            raise ValueError,"index pointer should start with 0"
         if (len(self.indptr) != M+1):
             raise ValueError, "index pointer should be of length #rows + 1"
-        if (nnz>0) and (amax(self.indices[:nnz]) >= N):
-            raise ValueError, "column values must be < N"
-        if (nnz>0) and (amin(self.indices[:nnz]) < 0):
-            raise ValueError, "column values must be >= 0"
+
+
+        if full_check:
+            #check format validity (more expensive)
+            if nnz > 0:
+                if amax(self.indices[:nnz]) >= N:
+                    raise ValueError, "column values must be < N"
+                if amin(self.indices[:nnz]) < 0:
+                    raise ValueError, "column values must be >= 0"
+            if numpy.diff(self.indptr).min() < 0:
+                raise ValueError,'indptr values must form a non-decreasing sequence'
+
         if (nnz > nzmax):
             raise ValueError, \
                   "last value of index list should be less than "\
@@ -1388,7 +1414,7 @@
     def tocsc(self):
         indptr, rowind, data = csrtocsc(self.shape[0], self.shape[1], \
                                         self.indptr, self.indices, self.data)
-        return csc_matrix((data, rowind, indptr), self.shape)
+        return csc_matrix((data, rowind, indptr), self.shape, check=False)
 
     def _toother(self):
         return self.tocsc()


From scipy-svn at scipy.org  Fri Jun 29 03:00:58 2007
From: scipy-svn at scipy.org (scipy-svn at scipy.org)
Date: Fri, 29 Jun 2007 02:00:58 -0500 (CDT)
Subject: [Scipy-svn] r3123 - in trunk/Lib: linalg/tests sparse/tests
	special/tests
Message-ID: <20070629070058.C854B39C1AB@new.scipy.org>

Author: wnbell
Date: 2007-06-29 02:00:29 -0500 (Fri, 29 Jun 2007)
New Revision: 3123

Modified:
   trunk/Lib/linalg/tests/test_iterative.py
   trunk/Lib/sparse/tests/test_sparse.py
   trunk/Lib/special/tests/test_basic.py
Log:
commented out some print statements in unittests


Modified: trunk/Lib/linalg/tests/test_iterative.py
===================================================================
--- trunk/Lib/linalg/tests/test_iterative.py	2007-06-29 06:59:02 UTC (rev 3122)
+++ trunk/Lib/linalg/tests/test_iterative.py	2007-06-29 07:00:29 UTC (rev 3123)
@@ -27,7 +27,7 @@
 def callback(x):
     global A, b
     res = b-dot(A,x)
-    print "||A.x - b|| = " + str(norm(dot(A,x)-b))
+    #print "||A.x - b|| = " + str(norm(dot(A,x)-b))
 
 class test_iterative_solvers(NumpyTestCase):
     def __init__(self, *args, **kwds):

Modified: trunk/Lib/sparse/tests/test_sparse.py
===================================================================
--- trunk/Lib/sparse/tests/test_sparse.py	2007-06-29 06:59:02 UTC (rev 3122)
+++ trunk/Lib/sparse/tests/test_sparse.py	2007-06-29 07:00:29 UTC (rev 3123)
@@ -516,15 +516,15 @@
             assert(e.A.dtype.type == mytype)
 
     def check_ensure_sorted_indices(self):
-        print 'sorting CSR indices'
+        #print 'sorting CSR indices'
         data = arange( 5 )
         col = array( [7, 2, 1, 5, 4] )
         ptr = [0, 3, 5]
         asp = csr_matrix( (data, col, ptr), dims = (2,10) )
         bsp = asp.copy()
-        print 'in\n', asp
+        #print 'in\n', asp
         asp.ensure_sorted_indices( inplace = True )
-        print 'out\n', asp
+        #print 'out\n', asp
         assert_array_equal(asp.indices,[1, 2, 7, 4, 5])
         for ir in range( asp.shape[0] ):
             for ic in range( asp.shape[1] ):
@@ -575,15 +575,15 @@
             assert(e.A.dtype.type == mytype)
 
     def check_ensure_sorted_indices(self):
-        print 'sorting CSC indices'
+        #print 'sorting CSC indices'
         data = arange( 5 )
         row = array( [7, 2, 1, 5, 4] )
         ptr = [0, 3, 5]
         asp = csc_matrix( (data, row, ptr), dims = (10,2) )
         bsp = asp.copy()
-        print 'in\n', asp
+        #print 'in\n', asp
         asp.ensure_sorted_indices( inplace = True )
-        print 'out\n', asp
+        #print 'out\n', asp
         assert_array_equal(asp.indices,[1, 2, 7, 4, 5])
         for ir in range( asp.shape[0] ):
             for ic in range( asp.shape[1] ):

Modified: trunk/Lib/special/tests/test_basic.py
===================================================================
--- trunk/Lib/special/tests/test_basic.py	2007-06-29 06:59:02 UTC (rev 3122)
+++ trunk/Lib/special/tests/test_basic.py	2007-06-29 07:00:29 UTC (rev 3123)
@@ -1814,7 +1814,7 @@
         psub = poly1d([2,-1])
         q = 4*rand()
         p = q-1 + 2*rand()
-        print "shifted jacobi p,q = ", p, q
+        #print "shifted jacobi p,q = ", p, q
         G0 = sh_jacobi(0,p,q)
         G1 = sh_jacobi(1,p,q)
         G2 = sh_jacobi(2,p,q)