[Scipy-svn] r3066 - in trunk/Lib/sandbox/pyem: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Fri Jun 1 04:22:13 EDT 2007


Author: cdavid
Date: 2007-06-01 03:21:52 -0500 (Fri, 01 Jun 2007)
New Revision: 3066

Removed:
   trunk/Lib/sandbox/pyem/kmean.py
   trunk/Lib/sandbox/pyem/tests/test_kmean.py
Modified:
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/online_em.py
   trunk/Lib/sandbox/pyem/setup.py
Log:
Remove kmean as scipy.cluster.vq.kmeans2 does everything we need now

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Nov 16 02:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
@@ -12,7 +12,8 @@
 from numpy.random import randn
 #import _c_densities as densities
 import densities
-from kmean import kmean
+#from kmean import kmean
+from scipy.cluster.vq import kmeans2 as kmean
 from gauss_mix import GM
 
 from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND

Deleted: trunk/Lib/sandbox/pyem/kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/kmean.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/kmean.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,76 +0,0 @@
-# /usr/bin/python
-# Last Change: Thu Sep 28 01:00 PM 2006 J
-
-#TODO:
-#   - a demo for kmeans
-
-import numpy as N
-
-def _py_vq(data, code):
-    """ Please do not use directly. Use kmean instead"""
-    # No attempt to be efficient has been made...
-    (n, d)  = data.shape
-    (k, d)  = code.shape
-
-    label   = N.zeros(n, int)
-    for i in range(n):
-        d           = N.sum((data[i, :] - code) ** 2, 1)
-        label[i]    = N.argmin(d)
-
-    return label
-    
-# Try to import pyrex function for vector quantization. If not available,
-# falls back on pure python implementation.
-#%KMEANIMPORT%
-#try:
-#    from scipy.cluster.vq import kmeans as kmean
-#except ImportError:
-#    try:
-#        from c_gmm import _vq
-#    except:
-#        print """c_gmm._vq not found, using pure python implementation instead. 
-#        Kmean will be REALLY slow"""
-#        _vq = _py_vq
-try:
-    from scipy.cluster.vq import vq
-    print "using scipy.cluster.vq"
-    def _vq(*args, **kw): return vq(*args, **kw)[0]
-except ImportError:
-    try:
-        from c_gmm import _vq
-        print "using pyrex vq"
-    except ImportError:
-        print """c_gmm._vq not found, using pure python implementation instead. 
-        Kmean will be REALLY slow"""
-        _vq = _py_vq
-
-def kmean(data, init, iter = 10):
-    """Simple kmean implementation for EM. Runs iter iterations.
-    
-    returns a tuple (code, label), where code are the final
-    centroids, and label are the class label indec for each
-    frame (ie row) of data"""
-
-    data    = N.atleast_2d(data)
-    init    = N.atleast_2d(init)
-
-    (n, d)  = data.shape
-    (k, d1) = init.shape
-
-    if not d == d1:
-        msg = "data and init centers do not have same dimensions..."
-        raise GmmParamError(msg)
-    
-    code    = N.asarray(init.copy())
-    for i in range(iter):
-        # Compute the nearest neighbour for each obs
-        # using the current code book
-        label   = _vq(data, code)
-        # Update the code by computing centroids using the new code book
-        for j in range(k):
-            code[j,:] = N.mean(data[N.where(label==j)], axis=0) 
-
-    return code, label
-
-if __name__ == "__main__":
-    pass

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Wed Dec 06 09:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 
 #---------------------------------------------
 # This is not meant to be used yet !!!! I am 
@@ -23,7 +23,7 @@
 
 from gmm_em import ExpMixtureModel, GMM, EM
 from gauss_mix import GM
-from kmean import kmean
+from scipy.cluster.vq import kmeans2 as kmean
 import densities2 as D
 
 import copy

Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/setup.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Wed Dec 06 08:00 PM 2006 J
+# Last Change: Fri Jun 01 05:00 PM 2007 J
 # TODO:
 #   - check how to handle cmd line build options with distutils and use
 #   it in the building process
@@ -15,7 +15,6 @@
 for estimating meta parameters of mixtures. """
 
 from os.path import join
-# This import from __init__ looks strange, should check whether there is no other way
 from info import version as pyem_version
 
 DISTNAME    = 'pyem' 
@@ -32,12 +31,8 @@
     config.add_data_dir('tests')
     config.add_data_dir('profile_data')
     config.add_extension('c_gden',
-                         #define_macros=[('LIBSVM_EXPORTS', None),
-                         #               ('LIBSVM_DLL', None)],
                          sources=[join('src', 'c_gden.c')])
     config.add_extension('_rawden',
-                         #define_macros=[('LIBSVM_EXPORTS', None),
-                         #               ('LIBSVM_DLL', None)],
                          sources=[join('src', 'pure_den.c')])
 
     return config
@@ -47,108 +42,3 @@
     #setup(**configuration(top_path='').todict())
     #setup(**configuration(top_path=''))
     setup(configuration=configuration)
-# from distutils.core import setup, Extension
-# from pyem import version as pyem_version
-# 
-# # distutils does not update MANIFEST correctly, removes it
-# import os
-# if os.path.exists('MANIFEST'): os.remove('MANIFEST')
-# from os.path import join
-# 
-# import re
-# 
-# from numpy.distutils.misc_util import get_numpy_include_dirs
-# NUMPYINC    = get_numpy_include_dirs()[0]
-# 
-# # General variables:
-# #   - DISTNAME: name of the distributed package
-# #   - VERSION: the version reference is in pyem/__init__.py file
-# #   - other upper cased variables are the same than the corresponding 
-# #   keywords in setup call
-# DISTNAME    = 'pyem' 
-# VERSION     = pyem_version
-# DESCRIPTION ='A python module for Expectation Maximization learning of mixtures pdf',
-# AUTHOR      ='David Cournapeau',
-# AUTHOR_EMAIL='david at ar.media.kyoto-u.ac.jp',
-# URL         ='http://ar.media.kyoto-u.ac.jp/members/david',
-# 
-# # Source files for extensions
-# 
-# # Functions used to substitute values in File.
-# # Mainly use to replace config.h capabilities
-# def do_subst_in_file(sourcefile, targetfile, dict):
-#     """Replace all instances of the keys of dict with their values.
-#     For example, if dict is {'%VERSION%': '1.2345', '%BASE%': 'MyProg'},
-#     then all instances of %VERSION% in the file will be replaced with 1.2345 etc.
-#     """
-#     try:
-#         f = open(sourcefile, 'rb')
-#         contents = f.read()
-#         f.close()
-#     except:
-#         raise IOError, "Can't read source file %s"%sourcefile
-# 
-#     for (k,v) in dict.items():
-#         contents = re.sub(k, v, contents)
-#     try:
-#         f = open(targetfile, 'wb')
-#         f.write(contents)
-#         f.close()
-#     except:
-#         raise IOError, "Can't read source file %s"%sourcefile
-#     return 0 # success
-#  
-# class SetupOption:
-#     def __init__(self):
-#         self.kmean      = 'py'
-#         self.ext_modules= [Extension(join('pyem', 'c_gden'),
-#                               sources=[join('pyem', 'src', 'c_gden.c')]) ]
-#         self.cmdclass   = {}
-#         self.subsdic     = {'%KMEANIMPORT%': []}
-# 
-#     def _config_kmean(self):
-#         # Check in this order:
-#         #   - kmean in scipy.cluster,
-#         #   - custom vq with pyrex 
-#         #   - custom pure python vq
-#         #try:
-#         #    from scipy.cluster.vq import kmeans
-#         #    self.kmean  = 'scipy'
-#         #    #self.subsdic['%KMEANIMPORT%']   = scipy_kmean
-#         #except ImportError:
-#         #    try:
-#         #        from Pyrex.Distutils import build_ext
-#         #        self.kmean  = 'pyrex'
-#         #        self.ext_modules.append(Extension('pyem/c_gmm', 
-#         #            ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
-#         #        self.cmdclass['build_ext']  = build_ext
-#         #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         #    except ImportError:
-#         #        self.kmean  = 'py'
-#         #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         try:
-#             from Pyrex.Distutils import build_ext
-#             self.kmean  = 'pyrex'
-#             self.ext_modules.append(Extension('pyem/c_gmm', 
-#                 ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
-#             self.cmdclass['build_ext']  = build_ext
-#             #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#         except ImportError:
-#             self.kmean  = 'py'
-#             #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
-#     def setup(self):
-#         self._config_kmean()
-#         #import time
-#         #do_subst_in_file('pyem/kmean.py.in', 'pyem/kmean.py', self.subsdic)
-#         setup(name      = DISTNAME,
-#             version     = VERSION,
-#             description = DESCRIPTION,
-#             author      = AUTHOR,
-#             author_email= AUTHOR_EMAIL,
-#             url         = URL,
-#             packages    = ['pyem', 'pyem.tests', 'pyem.profile_data'],
-#             ext_modules = self.ext_modules,
-#             cmdclass    = self.cmdclass)
-# 
-# stpobj  = SetupOption()
-# stpobj.setup()

Deleted: trunk/Lib/sandbox/pyem/tests/test_kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_kmean.py	2007-05-31 15:25:26 UTC (rev 3065)
+++ trunk/Lib/sandbox/pyem/tests/test_kmean.py	2007-06-01 08:21:52 UTC (rev 3066)
@@ -1,46 +0,0 @@
-#! /usr/bin/env python
-# Last Change: Thu Sep 28 01:00 PM 2006 J
-
-import sys
-from numpy.testing import *
-
-import numpy as N
-
-set_package_path()
-from pyem.kmean import kmean
-restore_path()
-
-#Optional:
-set_local_path()
-# import modules that are located in the same directory as this file.
-restore_path()
-
-# Global data
-X   = N.array([[3.0, 3], [4, 3], [4, 2],
-        [9, 2], [5, 1], [6, 2], [9, 4], 
-        [5, 2], [5, 4], [7, 4], [6, 5]])
-
-codet1  = N.array([[3.0000, 3.0000],
-        [6.2000, 4.0000], 
-        [5.8000, 1.8000]])
-        
-codet2  = N.array([[11.0/3, 8.0/3], 
-        [6.7500, 4.2500],
-        [6.2500, 1.7500]])
-
-class test_kmean(NumpyTestCase):
-    def check_iter1(self, level=1):
-        initc   = N.concatenate(([[X[0]], [X[1]], [X[2]]])) 
-        code    = initc.copy()
-        code1   = kmean(X, code, 1)[0]
-
-        assert_array_almost_equal(code1, codet1)
-    def check_iter2(self, level=1):
-        initc   = N.concatenate(([[X[0]], [X[1]], [X[2]]])) 
-        code    = initc.copy()
-        code2   = kmean(X, code, 2)[0]
-
-        assert_array_almost_equal(code2, codet2)
-
-if __name__ == "__main__":
-    NumpyTest().run()




More information about the Scipy-svn mailing list