[Scipy-svn] r2051 - in trunk/Lib/sandbox/svm: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Thu Jul 6 22:33:37 EDT 2006
Author: fullung
Date: 2006-07-06 21:33:26 -0500 (Thu, 06 Jul 2006)
New Revision: 2051
Modified:
trunk/Lib/sandbox/svm/__init__.py
trunk/Lib/sandbox/svm/model.py
trunk/Lib/sandbox/svm/regression.py
trunk/Lib/sandbox/svm/tests/test_regression.py
Log:
Training of regression model works.
Modified: trunk/Lib/sandbox/svm/__init__.py
===================================================================
--- trunk/Lib/sandbox/svm/__init__.py 2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/__init__.py 2006-07-07 02:33:26 UTC (rev 2051)
@@ -18,4 +18,5 @@
from classification import *
from regression import *
from oneclass import *
-from data import *
+from dataset import *
+from kernel import *
Modified: trunk/Lib/sandbox/svm/model.py
===================================================================
--- trunk/Lib/sandbox/svm/model.py 2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/model.py 2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,81 +1,72 @@
__all__ = [
- 'Model'
+ 'LibSvmModel'
]
+from ctypes import *
+
+from kernel import *
import libsvm
-import utils
-import numpy as N
-from ctypes import *
+class LibSvmModel:
+ def __init__(self, svm_type, kernel,
+ tolerance=0.001, shrinking=True, cache_size=40):
+ """
+ Parameters:
-class Model:
- def __init__(self, dtype, shrinking=True, cache_size=40, tol=0.001):
- self.dtype = dtype
+ - `svm_type`: XXX
+ - `kernel`: XXX
+ - `tolerance`: tolerance of termination criterion
+ - `shrinking`: whether to use the shrinking heuristics
+ - `cache_size` kernel evaluation cache size (MB)
+ """
+ self.svm_type = svm_type
+ self.kernel = kernel
+ self.tolerance = tolerance
self.shrinking = shrinking
self.cache_size = cache_size
- self.tol = tol
- def fit(self, data):
- svm_data = self.dtype.convert_train_data(data)
- # libsvm requires data to be sorted by label
- svm_data.sort(cmp=lambda x, y: cmp(x[0], y[0]))
- param = self.setup_svm_parameter(svm_data)
+ param = libsvm.svm_parameter()
- # XXX find better way to keep x and y references
- problem, x, y = self.setup_svm_problem(svm_data)
+ if isinstance(kernel, LinearKernel):
+ param.kernel_type = libsvm.LINEAR
+ elif isinstance(kernel, PolynomialKernel):
+ param.kernel_type = libsvm.POLY
+ param.degree = kernel.degree
+ param.gamma = kernel.gamma
+ param.coef0 = kernel.coef0
+ elif isinstance(kernel, RBFKernel):
+ param.kernel_type = libsvm.RBF
+ param.gamma = kernel.gamma
+ elif isinstance(kernel, SigmoidKernel):
+ param.kernel_type = libsvm.SIGMOID
+ param.gamma = kernel.gamma
+ param.coef0 = kernel.coef0
+ else:
+ raise ValueError, 'unknown kernel type'
- self.check_problem_param(problem, param)
- model = libsvm.svm_train(problem, param)
- self.results = self.Results(self.dtype, model)
+ param.svm_type = svm_type
+ param.eps = tolerance
+ param.shrinking = shrinking
+ param.cache_size = cache_size
- # XXX find better way to keep svm_data reference
- self.results.svm_data = svm_data
+ self.param = param
- return self.results
-
- def predict(self, x):
- return self.results.predict(svm_data)
-
- def setup_svm_parameter(self, svm_data):
- param = libsvm.svm_parameter()
- param.svm_type = getattr(self, 'svm_type')
- param.kernel_type = getattr(self.dtype, 'kernel_type')
- param.degree = getattr(self.dtype, 'degree', 0)
- if hasattr(self.dtype, 'gamma') and self.dtype.gamma is None:
- maxlen = 0
- for x in svm_data:
- maxlen = max(maxlen, x[1]['index'][:-1].max())
- param.gamma = 1.0/maxlen
- else:
- param.gamma = getattr(self.dtype, 'gamma', 0.0)
- param.coef0 = getattr(self.dtype, 'coef0', 0)
- param.cache_size = getattr(self, 'cache_size')
- param.eps = getattr(self, 'tol')
- param.C = getattr(self, 'cost', 0.0)
- # XXX nr_weight, weight_label, weight
- param.nr_weight = 0
- # XXX setting these to None zeros svm_type
- ###param.weight_label = None
- ###param.weight = None
- param.nu = getattr(self, 'nu', 0.0)
- param.p = getattr(self, 'epsilon', 0.0)
- param.shrinking = getattr(self, 'shrinking')
- param.probability = 0
- return param
-
- def setup_svm_problem(self, svm_data):
+ def fit(self, dataset):
+ # XXX don't poke around in dataset's internals
problem = libsvm.svm_problem()
- problem.l = len(svm_data)
+ problem.l = len(dataset.data)
y = (c_double*problem.l)()
x = (POINTER(libsvm.svm_node)*problem.l)()
- for i, (label, node) in enumerate(svm_data):
- y[i] = label
- x[i] = utils.array_as_ctype(node, libsvm.svm_node)
+ for i, (yi, xi) in enumerate(dataset.data):
+ y[i] = yi
+ x[i] = cast(xi.ctypes.data, POINTER(libsvm.svm_node))
problem.x = cast(addressof(x), POINTER(POINTER(libsvm.svm_node)))
problem.y = cast(addressof(y), POINTER(c_double))
- return problem, x, y
- def check_problem_param(self, problem, param):
+ self._check_problem_param(problem, self.param)
+ model = libsvm.svm_train(problem, self.param)
+
+ def _check_problem_param(self, problem, param):
error_msg = libsvm.svm_check_parameter(problem, param)
if error_msg:
raise ValueError, error_msg
Modified: trunk/Lib/sandbox/svm/regression.py
===================================================================
--- trunk/Lib/sandbox/svm/regression.py 2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/regression.py 2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,51 +1,40 @@
-from model import Model
-from results import Results
+__all__ = [
+ 'LibSvmEpsilonRegressionModel',
+ 'LibSvmNuRegressionModel'
+ ]
+
+from model import LibSvmModel
import libsvm
-import utils
+"""
class RegressionResults(Results):
- def __init__(self, dtype, model):
- Results.__init__(self, dtype, model)
+ def __init__(self, model):
+ Results.__init__(self, model)
model = model.contents
self.rho = model.rho[0]
self.sv_coef = model.sv_coef[0][:model.l]
- def predict(self, x):
- x = self.dtype.convert_test_data(x)
- xptr = utils.array_as_ctype(x, libsvm.svm_node)
- return libsvm.svm_predict(self.model, xptr)
+ def predict(self, dataset):
+ #x = self.dtype.convert_test_data(x)
+ #xptr = utils.array_as_ctype(x, libsvm.svm_node)
+ #return libsvm.svm_predict(self.model, xptr)
+ raise NotImplementedError
+"""
-class EpsilonSVRModel(Model):
- """
- A model for epsilon-SV regression.
-
- See also:
-
- - Smola, Scholkopf: A Tutorial on Support Vector Regression
- - Gunn: Support Vector Machines for Classification and Regression
- - Muller, Vapnik: Using Support Vector Machines for Time Series
- Prediction
- """
-
- Results = RegressionResults
-
- def __init__(self, dtype, cost=1.0, epsilon=0.1, **kwargs):
- Model.__init__(self, dtype, **kwargs)
- self.svm_type = libsvm.EPSILON_SVR
- self.cost = cost
+class LibSvmEpsilonRegressionModel(LibSvmModel):
+ def __init__(self, kernel, epsilon=0.1, cost=1.0, **kwargs):
+ LibSvmModel.__init__(self, libsvm.EPSILON_SVR, kernel, **kwargs)
self.epsilon = epsilon
-
-class NuSVRModel(Model):
- """
- A model for nu-SV regression.
-
- See also: Scholkopf, et al.: New Support Vector Algorithms
- """
-
- Results = RegressionResults
-
- def __init__(self, dtype, cost=1.0, nu=0.5, **kwargs):
- Model.__init__(self, dtype, **kwargs)
- self.svm_type = libsvm.NU_SVR
self.cost = cost
+ self.param.p = epsilon
+ self.param.C = cost
+ self.param.probability = 1
+
+class LibSvmNuRegressionModel(LibSvmModel):
+ def __init__(self, kernel, nu=0.5, cost=1.0, **kwargs):
+ LibSvmModel.__init__(self, libsvm.NU_SVR, kernel, **kwargs)
self.nu = nu
+ self.cost = cost
+ self.param.nu = nu
+ self.param.C = cost
+ self.param.probability = 1
Modified: trunk/Lib/sandbox/svm/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/svm/tests/test_regression.py 2006-07-07 02:00:33 UTC (rev 2050)
+++ trunk/Lib/sandbox/svm/tests/test_regression.py 2006-07-07 02:33:26 UTC (rev 2051)
@@ -1,22 +1,31 @@
from numpy.testing import *
+import numpy as N
-# XXX remove this
-import os, sys
-sys.path.insert(0, os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')))
+from svm.regression import *
+from svm.dataset import LibSvmRegressionDataSet
+from svm.kernel import LinearKernel
-import svm
-import numpy as N
+class test_regression(NumpyTestCase):
+ def check_basics(self):
+ Model = LibSvmEpsilonRegressionModel
+ Kernel = LinearKernel()
+ Model(Kernel)
+ Model(Kernel, epsilon=0.1)
+ Model(Kernel, cost=1.0)
+ model = Model(Kernel, shrinking=False)
+ self.assert_(not model.shrinking)
-class test_regression(NumpyTestCase):
- def check_epsilon_svr(self):
+ def check_epsilon(self):
y = [10., 20., 30., 40.]
- x = [[0, 0], [0, 1], [1, 0], [1, 1]]
- data = zip(y, x)
- dtype = svm.LinearData()
- model = svm.EpsilonSVRModel(dtype, cost=10.0, epsilon=0.1)
- results = model.fit(data)
- for label, sample in data:
- print results.predict(sample)
+ x = [N.array([0, 0]),
+ N.array([0, 1]),
+ N.array([1, 0]),
+ N.array([1, 1])]
+ dataset = LibSvmRegressionDataSet(zip(y, x))
+ Model = LibSvmEpsilonRegressionModel
+ model = Model(LinearKernel())
+ model.fit(dataset)
+
if __name__ == '__main__':
NumpyTest().run()
More information about the Scipy-svn
mailing list