[Scipy-svn] r2308 - in trunk/Lib/sandbox/models: . family tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Nov 7 16:51:34 EST 2006
Author: jonathan.taylor
Date: 2006-11-07 15:50:20 -0600 (Tue, 07 Nov 2006)
New Revision: 2308
Modified:
trunk/Lib/sandbox/models/cox.py
trunk/Lib/sandbox/models/family/links.py
trunk/Lib/sandbox/models/formula.py
trunk/Lib/sandbox/models/tests/test_regression.py
trunk/Lib/sandbox/models/tests/test_robust.py
Log:
some name changes: changing CamelCase to lower_case
Modified: trunk/Lib/sandbox/models/cox.py
===================================================================
--- trunk/Lib/sandbox/models/cox.py 2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/cox.py 2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,7 +3,7 @@
import numpy as N
from scipy.sandbox.models import survival, model
-class DiscreteRV:
+class discrete:
"""
A simple little class for working with discrete random vectors.
@@ -15,13 +15,13 @@
self.x = N.array([self.x])
self.n = self.x.shape[0]
if w is None:
- w = N.ones(self.n, N.float64)
+ w = N.ones(self.n, N.float64)
else:
if w.shape[0] != self.n:
raise ValueError, 'incompatible shape for weights w'
if N.any(N.less(w, 0)):
raise ValueError, 'weights should be non-negative'
- self.w = w / w.sum()
+ self.w = w / w.sum()
def mean(self, f=None):
if f is None:
@@ -31,11 +31,11 @@
return (fx * self.w).sum()
def cov(self):
- mu = self.moment()
+ mu = self.mean()
dx = self.x - N.multiply.outer(mu, self.x.shape[1])
return N.dot(dx, N.transpose(dx))
-class Observation(survival.RightCensored):
+class observation(survival.right_censored):
def __getitem__(self, item):
if self.namespace is not None:
@@ -45,18 +45,17 @@
def __init__(self, time, delta, namespace=None):
self.namespace = namespace
- survival.RightCensored.__init__(self, time, delta)
+ survival.right_censored.__init__(self, time, delta)
def __call__(self, formula, time=None, **extra):
return formula(namespace=self, time=time, **extra)
-class ProportionalHazards(model.LikelihoodModel):
+class coxph(model.likelihood_model):
def __init__(self, subjects, formula, time_dependent=False):
self.subjects, self.formula = subjects, formula
self.time_dependent = time_dependent
self.initialize(self.subjects)
-
def initialize(self, subjects):
@@ -142,7 +141,7 @@
if ties == 'breslow':
w = N.exp(N.dot(Z, b))
- rv = DiscreteRV(Z[risk], w=w[risk])
+ rv = discrete(Z[risk], w=w[risk])
score -= rv.mean() * d
elif ties == 'efron':
w = N.exp(N.dot(Z, b))
@@ -150,7 +149,7 @@
for j in range(d):
efron_w = w
efron_w[fail] -= i * w[fail] / d
- rv = DiscreteRV(Z[risk], w=efron_w[risk])
+ rv = discrete(Z[risk], w=efron_w[risk])
score -= rv.mean()
elif ties == 'cox':
raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -175,7 +174,7 @@
if ties == 'breslow':
w = N.exp(N.dot(Z, b))
- rv = DiscreteRV(Z[risk], w=w[risk])
+ rv = discrete(Z[risk], w=w[risk])
info += rv.cov()
elif ties == 'efron':
w = N.exp(N.dot(Z, b))
@@ -183,7 +182,7 @@
for j in range(d):
efron_w = w
efron_w[fail] -= i * w[fail] / d
- rv = DiscreteRV(Z[risk], w=efron_w[risk])
+ rv = discrete(Z[risk], w=efron_w[risk])
info += rv.cov()
elif ties == 'cox':
raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -200,7 +199,7 @@
Y = R.standard_exponential((2*n,)) / lin
delta = R.binomial(1, 0.9, size=(2*n,))
- subjects = [Observation(Y[i], delta[i]) for i in range(2*n)]
+ subjects = [observation(Y[i], delta[i]) for i in range(2*n)]
for i in range(2*n):
subjects[i].X = X[i]
@@ -208,7 +207,7 @@
x = F.Quantitative('X')
f = F.Formula(x)
- c = ProportionalHazards(subjects, f)
+ c = coxph(subjects, f)
c.cache()
c.newton([0.4])
Modified: trunk/Lib/sandbox/models/family/links.py
===================================================================
--- trunk/Lib/sandbox/models/family/links.py 2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/family/links.py 2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,11 +3,9 @@
class Link:
- def __init__(self):
- pass
+ def initialize(self, Y):
+ return N.asarray(Y).mean() * N.ones(Y.shape)
- pass
-
class Logit(Link):
"""
@@ -17,26 +15,16 @@
"""
tol = 1.0e-10
- init = 1.0e-03
- def clean(self, p, inverse=False, initialize=False):
- if initialize:
- tol = Logit.tol
- else:
- tol = Logit.init
+ def clean(self, p):
+ return N.clip(p, Logit.tol, 1. - Logit.tol)
- if not inverse:
- return N.clip(p, tol, 1. - tol)
- else:
- l = self(tol); u = self(1 - tol)
- return N.clip(p, l, u)
-
- def __call__(self, p, initialize=True, **extra):
- p = self.clean(p, **extra)
+ def __call__(self, p):
+ p = self.clean(p)
return N.log(p / (1. - p))
def inverse(self, z):
- t = N.exp(self.clean(z, inverse=True))
+ t = N.exp(z)
return t / (1. + t)
def deriv(self, p):
@@ -57,7 +45,7 @@
def __init__(self, power=1.):
self.power = power
- def __call__(self, x, **extra):
+ def __call__(self, x):
return N.power(x, self.power)
def inverse(self, x):
@@ -108,16 +96,10 @@
"""
tol = 1.0e-10
- init = 1.0e-03
def clean(self, x):
- if initialize:
- tol = Logit.tol
- else:
- tol = Logit.init
+ return N.clip(x, Logit.tol, N.inf)
- return N.clip(x, tol, N.inf)
-
def __call__(self, x, **extra):
x = self.clean(x)
return N.log(x)
@@ -143,7 +125,7 @@
def __init__(self, dbn=scipy.stats.norm):
self.dbn = dbn
- def __call__(self, p, **extra):
+ def __call__(self, p):
p = self.clean(p)
return self.dbn.ppf(p)
@@ -181,7 +163,7 @@
"""
- def __call__(self, p, **extra):
+ def __call__(self, p):
p = self.clean(p)
return N.log(-N.log(p))
Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py 2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/formula.py 2006-11-07 21:50:20 UTC (rev 2308)
@@ -4,7 +4,7 @@
terms = {}
-class Term:
+class term:
"""
This class is very simple: it is just a named term in a model formula.
@@ -15,7 +15,7 @@
"""
- def __init__(self, name, func=None, termname=None, namespace=terms):
+ def __init__(self, name, func=None, termname=None, namespace={}):
self.name = name
@@ -39,22 +39,22 @@
def __add__(self, other):
"""
- Formula(self) + Formula(other)
+ formula(self) + formula(other)
"""
- other = Formula(other)
+ other = formula(other)
return other + self
def __mul__(self, other):
"""
- Formula(self) * Formula(other)
+ formula(self) * formula(other)
"""
if other.name is 'intercept':
- return Formula(self)
+ return formula(self)
elif self.name is 'intercept':
- return Formula(other)
+ return formula(other)
- other = Formula(other)
+ other = formula(other)
return other * self
def names(self):
@@ -67,28 +67,32 @@
else:
return list(self.name)
- def __call__(self, namespace=terms, usefn=True, **extra):
+ def __call__(self, namespace=terms, usefn=True, args=(), kw={}):
"""
Return the columns associated to self in a design matrix.
The default behaviour is to return namespace[self.termname]
where namespace defaults to globals().
If usefn, and self.func exists then return
- self.func(namespace=namespace, **extra)
+
+ self.func(*args, **kw)
+
+ with kw['namespace'] = namespace.
"""
+ kw['namespace'] = namespace
if not hasattr(self, 'func') or not usefn:
val = namespace[self.termname]
- if isinstance(val, Formula):
- val = val(namespace=namespace, **extra)
+ if isinstance(val, formula):
+ val = val(*args, **kw)
elif callable(val):
- val = val(**extra)
+ val = val(*args, **kw)
else:
- val = self.func(namespace=namespace, **extra)
+ val = self.func(*args, **kw)
val = N.asarray(val)
return N.squeeze(val)
-class Factor(Term):
+class factor(term):
"""
A categorical factor.
@@ -96,7 +100,7 @@
def __init__(self, termname, keys, ordinal=False):
"""
- Factor is initialized with keys, representing all valid
+ factor is initialized with keys, representing all valid
levels of the factor.
"""
@@ -114,7 +118,7 @@
# FIXME: n is not defined here
col = [float(self.keys.index(v[i])) for i in range(n)]
return N.array(col)
- Term.__init__(self, self.name, func=func)
+ term.__init__(self, self.name, func=func)
else:
def func(namespace=terms):
@@ -124,9 +128,9 @@
col = [float((v[i] == key)) for i in range(len(v))]
value.append(col)
return N.array(value)
- Term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)
+ term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)
- def __call__(self, namespace=terms, values=False, **extra):
+ def __call__(self, namespace=terms, values=False, args=(), kw={}):
"""
Return either the columns in the design matrix, or the
actual values of the factor, if values==True.
@@ -135,9 +139,9 @@
if namespace is None:
namespace = globals()
if not values:
- return Term.__call__(self, namespace=namespace, usefn=True, **extra)
+ return term.__call__(self, namespace=namespace, usefn=True, args=args, kw=kw)
else:
- return Term.__call__(self, namespace=namespace, usefn=False, **extra)
+ return term.__call__(self, namespace=namespace, usefn=False, args=args, kw=kw)
def verify(self, values):
"""
@@ -149,19 +153,19 @@
def __add__(self, other):
"""
- Formula(self) + Formula(other)
+ formula(self) + formula(other)
- When adding \'intercept\' to a Factor, this just returns self.
+ When adding \'intercept\' to a factor, this just returns self.
"""
if other.name is 'intercept':
- return Formula(self)
+ return formula(self)
else:
- return Term.__add__(self, other)
+ return term.__add__(self, other)
def main_effect(self, reference=None):
"""
- Return the 'main effect' columns of a Factor, choosing
+ Return the 'main effect' columns of a factor, choosing
a reference column number to remove.
"""
@@ -181,12 +185,12 @@
keep.pop(reference)
__names = self.names()
_names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))]
- return Term(_names, func=func, termname='%s:maineffect' % self.termname)
+ return term(_names, func=func, termname='%s:maineffect' % self.termname)
-class Quantitative(Term):
+class quantitative(term):
"""
- A subclass of Term that presumes namespace[self.termname] is
+ A subclass of term that presumes namespace[self.termname] is
an ndarray.
Basically used for __pow__ method and (looking forward) for splines.
@@ -195,7 +199,7 @@
def __pow__(self, power):
"""
- Raise the quantitative Term's values to an integer power, i.e.
+ Raise the quantitative term's values to an integer power, i.e.
polynomial.
"""
try:
@@ -211,13 +215,13 @@
def func(obj=self, namespace=terms, power=power, **extra):
x = N.asarray(obj(namespace=namespace, **extra))
return N.power(x, power)
- value = Term(name, func=func)
+ value = term(name, func=func)
value.power = power
return value
-class FuncQuant(Quantitative):
+class func_quant(quantitative):
"""
- A Term for a quantitative function of a Term.
+ A term for a quantitative function of a term.
"""
counter = 0
@@ -238,18 +242,18 @@
except:
termname = 'f%d(%s)' % (FuncQuant.counter, quant.name)
FuncQuant.counter += 1
- Term.__init__(self, termname, func=func)
+ term.__init__(self, termname, func=func)
-class Formula:
+class formula:
"""
- A Formula object for manipulating design matrices in regression models,
- essentially consisting of a list of Term instances.
+ A formula object for manipulating design matrices in regression models,
+ essentially consisting of a list of term instances.
The object supports addition and multiplication which correspond
to concatenation and pairwise multiplication, respectively,
- of the columns of the two Formulas.
+ of the columns of the two formulas.
"""
def _terms_changed(self):
@@ -258,19 +262,19 @@
def __init__(self, terms):
"""
- Create a Formula from either:
+ Create a formula from either:
- i) a Formula object
- ii) a sequence of Term instances
- iii) one Term
+ i) a formula object
+ ii) a sequence of term instances
+ iii) one term
"""
- if isinstance(terms, Formula):
+ if isinstance(terms, formula):
self.terms = copy.copy(list(terms.terms))
elif type(terms) is types.ListType:
self.terms = terms
- elif isinstance(terms, Term):
+ elif isinstance(terms, term):
self.terms = [terms]
else:
raise ValueError
@@ -279,18 +283,18 @@
def __str__(self):
"""
- String representation of list of termnames of a Formula.
+ String representation of list of termnames of a formula.
"""
value = []
for term in self.terms:
value += [term.termname]
return '<formula: %s>' % ' + '.join(value)
- def __call__(self, namespace=terms, nrow=-1, **extra):
+ def __call__(self, namespace=terms, nrow=-1, args=(), kw={}):
"""
- Create (transpose) of the design matrix of the Formula within
- namespace. Extra arguments are passed to each Term instance. If
- the Formula just contains an intercept, then the keyword
+ Create (transpose) of the design matrix of the formula within
+ namespace. Extra arguments are passed to each term instance. If
+ the formula just contains an intercept, then the keyword
argument 'n' indicates the number of rows (observations).
"""
@@ -299,7 +303,7 @@
allvals = []
intercept = False
for term in self.terms:
- val = term(namespace=namespace, **extra)
+ val = term(namespace=namespace, args=args, kw=kw)
if term.termname == 'intercept':
intercept = True
elif val.ndim == 1:
@@ -330,7 +334,7 @@
Determine whether a given term is in a formula.
"""
- if not isinstance(term, Formula):
+ if not isinstance(term, formula):
return term.termname in self.termnames()
elif len(term.terms) == 1:
term = term.terms[0]
@@ -365,7 +369,7 @@
def names(self):
"""
- Return a list of the names in the Formula. The order of the
+ Return a list of the names in the formula. The order of the
names corresponds to the order of the columns when self
is evaluated.
"""
@@ -378,7 +382,7 @@
def termnames(self):
"""
Return a list of the term names in the formula. These
- are the names of each Term instance in self.
+ are the names of each term instance in self.
"""
names = []
@@ -386,21 +390,21 @@
names += [term.termname]
return names
- def design(self, namespace=terms, **keywords):
+ def design(self, namespace=terms, args=(), kw={}):
"""
transpose(self(namespace=namespace, **keywords))
"""
- return N.transpose(self(namespace=namespace, **keywords))
+ return self(namespace=namespace, args=args, kw=kw).T
def __mul__(self, other, nested=False):
"""
- This returns a Formula whose columns are the pairwise
+ This returns a formula whose columns are the pairwise
product of the columns of self and other.
TO DO: check for nesting relationship. Should not be too difficult.
"""
- other = Formula(other)
+ other = formula(other)
selftermnames = self.termnames()
othertermnames = other.termnames()
@@ -423,9 +427,9 @@
othernames = other.terms[j].names()
if self.terms[i].name is 'intercept':
- term = other.terms[j]
+ _term = other.terms[j]
elif other.terms[j].name is 'intercept':
- term = self.terms[i]
+ _term = self.terms[i]
else:
names = []
@@ -451,36 +455,36 @@
value.append(selfval[r] * otherval[s])
return N.array(value)
- term = Term(names, func=func, termname=termname)
- terms.append(term)
+ _term = term(names, func=func, termname=termname)
+ terms.append(_term)
- return Formula(terms)
+ return formula(terms)
def __add__(self, other):
"""
- Return a Formula whose columns are the
+ Return a formula whose columns are the
concatenation of the columns of self and other.
- Terms in the formula are sorted alphabetically.
+ terms in the formula are sorted alphabetically.
"""
- other = Formula(other)
+ other = formula(other)
terms = self.terms + other.terms
pieces = [(term.name, term) for term in terms]
pieces.sort()
terms = [piece[1] for piece in pieces]
- return Formula(terms)
+ return formula(terms)
def __sub__(self, other):
"""
- Return a Formula with all terms in other removed from self.
- If other contains Term instances not in Formula, this
+ Return a formula with all terms in other removed from self.
+ If other contains term instances not in formula, this
function does not raise an exception.
"""
- other = Formula(other)
+ other = formula(other)
terms = copy.copy(self.terms)
for term in other.terms:
@@ -488,7 +492,7 @@
if terms[i].termname == term.termname:
terms.pop(i)
break
- return Formula(terms)
+ return formula(terms)
def isnested(A, B, namespace=globals()):
"""
@@ -524,9 +528,9 @@
def _intercept_fn(nrow=1, **extra):
return N.ones((1,nrow))
-I = Term('intercept', func=_intercept_fn)
+I = term('intercept', func=_intercept_fn)
I.__doc__ = """
-Intercept term in a Formula. If intercept is the
+Intercept term in a formula. If intercept is the
only term in the formula, then a keywords argument
\'nrow\' is needed.
@@ -536,7 +540,7 @@
>>> I(nrow=5)
array([1, 1, 1, 1, 1])
->>> f=Formula(I)
+>>> f=formula(I)
>>> f(nrow=5)
array([1, 1, 1, 1, 1])
Modified: trunk/Lib/sandbox/models/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_regression.py 2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,6 +1,6 @@
import unittest
from numpy.random import standard_normal
-from scipy.sandbox.models.regression import OLSModel, ARModel
+from scipy.sandbox.models.regression import ols_model, ar_model
from numpy.testing import *
W = standard_normal
@@ -10,14 +10,14 @@
def testOLS(self):
X = W((40,10))
Y = W((40,))
- model = OLSModel(design=X)
+ model = ols_model(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)
def testAR(self):
X = W((40,10))
Y = W((40,))
- model = ARModel(design=X, rho=0.4)
+ model = ar_model(design=X, rho=0.4)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)
@@ -25,7 +25,7 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
- model = OLSModel(design=X)
+ model = ols_model(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)
@@ -33,14 +33,10 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
- model = ARModel(design=X, rho=0.9)
+ model = ar_model(design=X, rho=0.9)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)
-def suite():
- suite = unittest.makeSuite(RegressionTest)
- return suite
-
if __name__ == '__main__':
ScipyTest.run()
Modified: trunk/Lib/sandbox/models/tests/test_robust.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_robust.py 2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_robust.py 2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,4 +1,4 @@
-import models as S
+import scipy.sandbox.models as S
import unittest
import numpy.random as R
import numpy as N
@@ -10,7 +10,7 @@
def testRobust(self):
X = W((40,10))
Y = W((40,))
- model = S.rlm.RobustLinearModel(design=X)
+ model = S.rlm(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)
@@ -18,15 +18,10 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
- model = S.rlm.RobustLinearModel(design=X)
+ model = S.rlm(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)
-def suite():
- suite = unittest.makeSuite(RegressionTest)
- return suite
-
-
if __name__ == '__main__':
unittest.main()
More information about the Scipy-svn
mailing list