[Scipy-svn] r2051 - in trunk/Lib/sandbox/svm: . tests

Thu Jul 6 22:33:37 EDT 2006

Author: fullung
Date: 2006-07-06 21:33:26 -0500 (Thu, 06 Jul 2006)
New Revision: 2051

Modified:
   trunk/Lib/sandbox/svm/__init__.py
   trunk/Lib/sandbox/svm/model.py
   trunk/Lib/sandbox/svm/regression.py
   trunk/Lib/sandbox/svm/tests/test_regression.py
Log:
Training of regression model works.


Modified: trunk/Lib/sandbox/svm/__init__.py
===================================================================

--- trunk/Lib/sandbox/svm/__init__.py	2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/__init__.py	2006-07-07 02:33:26 UTC (rev 2051)
@@ -18,4 +18,5 @@
 from classification import *
 from regression import *
 from oneclass import *
-from data import *
+from dataset import *
+from kernel import *

Modified: trunk/Lib/sandbox/svm/model.py
===================================================================
--- trunk/Lib/sandbox/svm/model.py	2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/model.py	2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,81 +1,72 @@
 __all__ = [
-    'Model'
+    'LibSvmModel'
     ]
 
+from ctypes import *
+
+from kernel import *
 import libsvm
-import utils
 
-import numpy as N
-from ctypes import *
+class LibSvmModel:
+    def __init__(self, svm_type, kernel,
+                 tolerance=0.001, shrinking=True, cache_size=40):
+        """
+        Parameters:
 
-class Model:
-    def __init__(self, dtype, shrinking=True, cache_size=40, tol=0.001):
-        self.dtype = dtype
+        - `svm_type`: XXX
+        - `kernel`: XXX
+        - `tolerance`: tolerance of termination criterion
+        - `shrinking`: whether to use the shrinking heuristics
+        - `cache_size` kernel evaluation cache size (MB)
+        """
+        self.svm_type = svm_type
+        self.kernel = kernel
+        self.tolerance = tolerance
         self.shrinking = shrinking
         self.cache_size = cache_size
-        self.tol = tol
 
-    def fit(self, data):
-        svm_data = self.dtype.convert_train_data(data)
-        # libsvm requires data to be sorted by label
-        svm_data.sort(cmp=lambda x, y: cmp(x[0], y[0]))
-        param = self.setup_svm_parameter(svm_data)
+        param = libsvm.svm_parameter()
 
-        # XXX find better way to keep x and y references
-        problem, x, y = self.setup_svm_problem(svm_data)
+        if isinstance(kernel, LinearKernel):
+            param.kernel_type = libsvm.LINEAR
+        elif isinstance(kernel, PolynomialKernel):
+            param.kernel_type = libsvm.POLY
+            param.degree = kernel.degree
+            param.gamma = kernel.gamma
+            param.coef0 = kernel.coef0
+        elif isinstance(kernel, RBFKernel):
+            param.kernel_type = libsvm.RBF
+            param.gamma = kernel.gamma
+        elif isinstance(kernel, SigmoidKernel):
+            param.kernel_type = libsvm.SIGMOID
+            param.gamma = kernel.gamma
+            param.coef0 = kernel.coef0
+        else:
+            raise ValueError, 'unknown kernel type'
 
-        self.check_problem_param(problem, param)
-        model = libsvm.svm_train(problem, param)
-        self.results = self.Results(self.dtype, model)
+        param.svm_type = svm_type
+        param.eps = tolerance
+        param.shrinking = shrinking
+        param.cache_size = cache_size
 
-        # XXX find better way to keep svm_data reference
-        self.results.svm_data = svm_data
+        self.param = param
 
-        return self.results
-
-    def predict(self, x):
-        return self.results.predict(svm_data)
-
-    def setup_svm_parameter(self, svm_data):
-        param = libsvm.svm_parameter()
-        param.svm_type = getattr(self, 'svm_type')
-        param.kernel_type = getattr(self.dtype, 'kernel_type')
-        param.degree = getattr(self.dtype, 'degree', 0)
-        if hasattr(self.dtype, 'gamma') and self.dtype.gamma is None:
-            maxlen = 0
-            for x in svm_data:
-                maxlen = max(maxlen, x[1]['index'][:-1].max())
-            param.gamma = 1.0/maxlen
-        else:
-            param.gamma = getattr(self.dtype, 'gamma', 0.0)
-        param.coef0 = getattr(self.dtype, 'coef0', 0)
-        param.cache_size = getattr(self, 'cache_size')
-        param.eps = getattr(self, 'tol')
-        param.C = getattr(self, 'cost', 0.0)
-        # XXX nr_weight, weight_label, weight
-        param.nr_weight = 0
-        # XXX setting these to None zeros svm_type
-        ###param.weight_label = None
-        ###param.weight = None
-        param.nu = getattr(self, 'nu', 0.0)
-        param.p = getattr(self, 'epsilon', 0.0)
-        param.shrinking = getattr(self, 'shrinking')
-        param.probability = 0
-        return param
-
-    def setup_svm_problem(self, svm_data):
+    def fit(self, dataset):
+        # XXX don't poke around in dataset's internals
         problem = libsvm.svm_problem()
-        problem.l = len(svm_data)
+        problem.l = len(dataset.data)
         y = (c_double*problem.l)()
         x = (POINTER(libsvm.svm_node)*problem.l)()
-        for i, (label, node) in enumerate(svm_data):
-            y[i] = label
-            x[i] = utils.array_as_ctype(node, libsvm.svm_node)
+        for i, (yi, xi) in enumerate(dataset.data):
+            y[i] = yi
+            x[i] = cast(xi.ctypes.data, POINTER(libsvm.svm_node))
         problem.x = cast(addressof(x), POINTER(POINTER(libsvm.svm_node)))
         problem.y = cast(addressof(y), POINTER(c_double))
-        return problem, x, y
 
-    def check_problem_param(self, problem, param):
+        self._check_problem_param(problem, self.param)
+        model = libsvm.svm_train(problem, self.param)
+
+    def _check_problem_param(self, problem, param):
         error_msg = libsvm.svm_check_parameter(problem, param)
         if error_msg:
             raise ValueError, error_msg

Modified: trunk/Lib/sandbox/svm/regression.py
===================================================================
--- trunk/Lib/sandbox/svm/regression.py	2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/regression.py	2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,51 +1,40 @@
-from model import Model
-from results import Results
+__all__ = [
+    'LibSvmEpsilonRegressionModel',
+    'LibSvmNuRegressionModel'
+    ]
+
+from model import LibSvmModel
 import libsvm
-import utils
 
+"""
 class RegressionResults(Results):
-    def __init__(self, dtype, model):
-        Results.__init__(self, dtype, model)
+    def __init__(self, model):
+        Results.__init__(self, model)
         model = model.contents
         self.rho = model.rho[0]
         self.sv_coef = model.sv_coef[0][:model.l]
 
-    def predict(self, x):
-        x = self.dtype.convert_test_data(x)
-        xptr = utils.array_as_ctype(x, libsvm.svm_node)
-        return libsvm.svm_predict(self.model, xptr)
+    def predict(self, dataset):
+        #x = self.dtype.convert_test_data(x)
+        #xptr = utils.array_as_ctype(x, libsvm.svm_node)
+        #return libsvm.svm_predict(self.model, xptr)
+        raise NotImplementedError
+"""
 
-class EpsilonSVRModel(Model):
-    """
-    A model for epsilon-SV regression.
-
-    See also:
-
-    - Smola, Scholkopf: A Tutorial on Support Vector Regression
-    - Gunn: Support Vector Machines for Classification and Regression
-    - Muller, Vapnik: Using Support Vector Machines for Time Series
-      Prediction
-    """
-
-    Results = RegressionResults
-
-    def __init__(self, dtype, cost=1.0, epsilon=0.1, **kwargs):
-        Model.__init__(self, dtype, **kwargs)
-        self.svm_type = libsvm.EPSILON_SVR
-        self.cost = cost
+class LibSvmEpsilonRegressionModel(LibSvmModel):
+    def __init__(self, kernel, epsilon=0.1, cost=1.0, **kwargs):
+        LibSvmModel.__init__(self, libsvm.EPSILON_SVR, kernel, **kwargs)
         self.epsilon = epsilon
-
-class NuSVRModel(Model):
-    """
-    A model for nu-SV regression.
-
-    See also: Scholkopf, et al.: New Support Vector Algorithms
-    """
-
-    Results = RegressionResults
-
-    def __init__(self, dtype, cost=1.0, nu=0.5, **kwargs):
-        Model.__init__(self, dtype, **kwargs)
-        self.svm_type = libsvm.NU_SVR
         self.cost = cost
+        self.param.p = epsilon
+        self.param.C = cost
+        self.param.probability = 1
+
+class LibSvmNuRegressionModel(LibSvmModel):
+    def __init__(self, kernel, nu=0.5, cost=1.0, **kwargs):
+        LibSvmModel.__init__(self, libsvm.NU_SVR, kernel, **kwargs)
         self.nu = nu
+        self.cost = cost
+        self.param.nu = nu
+        self.param.C = cost
+        self.param.probability = 1

Modified: trunk/Lib/sandbox/svm/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_regression.py	2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/tests/test_regression.py	2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,22 +1,31 @@
 from numpy.testing import *
+import numpy as N
 
-# XXX remove this
-import os, sys
-sys.path.insert(0, os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')))
+from svm.regression import *
+from svm.dataset import LibSvmRegressionDataSet
+from svm.kernel import LinearKernel
 
-import svm
-import numpy as N
+class test_regression(NumpyTestCase):
+    def check_basics(self):
+        Model = LibSvmEpsilonRegressionModel
+        Kernel = LinearKernel()
+        Model(Kernel)
+        Model(Kernel, epsilon=0.1)
+        Model(Kernel, cost=1.0)
+        model = Model(Kernel, shrinking=False)
+        self.assert_(not model.shrinking)
 
-class test_regression(NumpyTestCase):
-    def check_epsilon_svr(self):
+    def check_epsilon(self):
         y = [10., 20., 30., 40.]
-        x = [[0, 0], [0, 1], [1, 0], [1, 1]]
-        data = zip(y, x)
-        dtype = svm.LinearData()
-        model = svm.EpsilonSVRModel(dtype, cost=10.0, epsilon=0.1)
-        results = model.fit(data)
-        for label, sample in data:
-            print results.predict(sample)
+        x = [N.array([0, 0]),
+             N.array([0, 1]),
+             N.array([1, 0]),
+             N.array([1, 1])]
+        dataset = LibSvmRegressionDataSet(zip(y, x))
 
+        Model = LibSvmEpsilonRegressionModel
+        model = Model(LinearKernel())
+        model.fit(dataset)
+
 if __name__ == '__main__':
     NumpyTest().run()