[Scipy-svn] r2308 - in trunk/Lib/sandbox/models: . family tests

Tue Nov 7 16:51:34 EST 2006

Author: jonathan.taylor
Date: 2006-11-07 15:50:20 -0600 (Tue, 07 Nov 2006)
New Revision: 2308

Modified:
   trunk/Lib/sandbox/models/cox.py
   trunk/Lib/sandbox/models/family/links.py
   trunk/Lib/sandbox/models/formula.py
   trunk/Lib/sandbox/models/tests/test_regression.py
   trunk/Lib/sandbox/models/tests/test_robust.py
Log:
some name changes: changing CamelCase to lower_case


Modified: trunk/Lib/sandbox/models/cox.py
===================================================================

--- trunk/Lib/sandbox/models/cox.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/cox.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,7 +3,7 @@
 import numpy as N
 from scipy.sandbox.models import survival, model
 
-class DiscreteRV:
+class discrete:
 
     """
     A simple little class for working with discrete random vectors.
@@ -15,13 +15,13 @@
             self.x = N.array([self.x])
         self.n = self.x.shape[0]
         if w is None:
-            w = N.ones(self.n, N.float64)
+            w = N.ones(self.n, N.float64) 
         else:
             if w.shape[0] != self.n:
                 raise ValueError, 'incompatible shape for weights w'
             if N.any(N.less(w, 0)):
                 raise ValueError, 'weights should be non-negative'
-            self.w = w / w.sum()
+        self.w = w / w.sum()
 
     def mean(self, f=None):
         if f is None:
@@ -31,11 +31,11 @@
         return (fx * self.w).sum()
 
     def cov(self):
-        mu = self.moment()
+        mu = self.mean()
         dx = self.x - N.multiply.outer(mu, self.x.shape[1])
         return N.dot(dx, N.transpose(dx))
 
-class Observation(survival.RightCensored):
+class observation(survival.right_censored):
 
     def __getitem__(self, item):
         if self.namespace is not None:
@@ -45,18 +45,17 @@
 
     def __init__(self, time, delta, namespace=None):
         self.namespace = namespace
-        survival.RightCensored.__init__(self, time, delta)
+        survival.right_censored.__init__(self, time, delta)
 
     def __call__(self, formula, time=None, **extra):
         return formula(namespace=self, time=time, **extra)
 
-class ProportionalHazards(model.LikelihoodModel):
+class coxph(model.likelihood_model):
 
     def __init__(self, subjects, formula, time_dependent=False):
         self.subjects, self.formula = subjects, formula
         self.time_dependent = time_dependent
         self.initialize(self.subjects)
-        
 
     def initialize(self, subjects):
 
@@ -142,7 +141,7 @@
 
             if ties == 'breslow':
                 w = N.exp(N.dot(Z, b))
-                rv = DiscreteRV(Z[risk], w=w[risk])
+                rv = discrete(Z[risk], w=w[risk])
                 score -= rv.mean() * d
             elif ties == 'efron':
                 w = N.exp(N.dot(Z, b))
@@ -150,7 +149,7 @@
                 for j in range(d):
                     efron_w = w
                     efron_w[fail] -= i * w[fail] / d
-                    rv = DiscreteRV(Z[risk], w=efron_w[risk])
+                    rv = discrete(Z[risk], w=efron_w[risk])
                     score -= rv.mean()
             elif ties == 'cox':
                 raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -175,7 +174,7 @@
 
             if ties == 'breslow':
                 w = N.exp(N.dot(Z, b))
-                rv = DiscreteRV(Z[risk], w=w[risk])
+                rv = discrete(Z[risk], w=w[risk])
                 info += rv.cov()
             elif ties == 'efron':
                 w = N.exp(N.dot(Z, b))
@@ -183,7 +182,7 @@
                 for j in range(d):
                     efron_w = w
                     efron_w[fail] -= i * w[fail] / d
-                    rv = DiscreteRV(Z[risk], w=efron_w[risk])
+                    rv = discrete(Z[risk], w=efron_w[risk])
                     info += rv.cov()
             elif ties == 'cox':
                 raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -200,7 +199,7 @@
     Y = R.standard_exponential((2*n,)) / lin
     delta = R.binomial(1, 0.9, size=(2*n,))
 
-    subjects = [Observation(Y[i], delta[i]) for i in range(2*n)]
+    subjects = [observation(Y[i], delta[i]) for i in range(2*n)]
     for i in range(2*n):
         subjects[i].X = X[i]
 
@@ -208,7 +207,7 @@
     x = F.Quantitative('X')
     f = F.Formula(x)
 
-    c = ProportionalHazards(subjects, f)
+    c = coxph(subjects, f)
 
     c.cache()
     c.newton([0.4])

Modified: trunk/Lib/sandbox/models/family/links.py
===================================================================
--- trunk/Lib/sandbox/models/family/links.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/family/links.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,11 +3,9 @@
 
 class Link:
 
-    def __init__(self):
-        pass
+    def initialize(self, Y):
+        return N.asarray(Y).mean() * N.ones(Y.shape)
 
-    pass
-
 class Logit(Link):
 
     """
@@ -17,26 +15,16 @@
     """
 
     tol = 1.0e-10
-    init = 1.0e-03
 
-    def clean(self, p, inverse=False, initialize=False):
-        if initialize:
-            tol = Logit.tol
-        else:
-            tol = Logit.init
+    def clean(self, p):
+        return N.clip(p, Logit.tol, 1. - Logit.tol)
 
-        if not inverse:
-            return N.clip(p, tol, 1. - tol)
-        else:
-            l = self(tol); u = self(1 - tol)
-            return N.clip(p, l, u)
-
-    def __call__(self, p, initialize=True, **extra):
-        p = self.clean(p, **extra)
+    def __call__(self, p):
+        p = self.clean(p)
         return N.log(p / (1. - p))
 
     def inverse(self, z):
-        t = N.exp(self.clean(z, inverse=True))
+        t = N.exp(z)
         return t / (1. + t)
 
     def deriv(self, p):
@@ -57,7 +45,7 @@
     def __init__(self, power=1.):
         self.power = power
 
-    def __call__(self, x, **extra):
+    def __call__(self, x):
         return N.power(x, self.power)
 
     def inverse(self, x):
@@ -108,16 +96,10 @@
     """
 
     tol = 1.0e-10
-    init = 1.0e-03
 
     def clean(self, x):
-        if initialize:
-            tol = Logit.tol
-        else:
-            tol = Logit.init
+        return N.clip(x, Logit.tol, N.inf)
 
-        return N.clip(x, tol, N.inf)
-
     def __call__(self, x, **extra):
         x = self.clean(x)
         return N.log(x)
@@ -143,7 +125,7 @@
     def __init__(self, dbn=scipy.stats.norm):
         self.dbn = dbn
 
-    def __call__(self, p, **extra):
+    def __call__(self, p):
         p = self.clean(p)
         return self.dbn.ppf(p)
 
@@ -181,7 +163,7 @@
 
     """
 
-    def __call__(self, p, **extra):
+    def __call__(self, p):
         p = self.clean(p)
         return N.log(-N.log(p))
 

Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/formula.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -4,7 +4,7 @@
 
 terms = {}
 
-class Term:
+class term:
 
     """
     This class is very simple: it is just a named term in a model formula.
@@ -15,7 +15,7 @@
     
     """
 
-    def __init__(self, name, func=None, termname=None, namespace=terms):
+    def __init__(self, name, func=None, termname=None, namespace={}):
         
         self.name = name
 
@@ -39,22 +39,22 @@
 
     def __add__(self, other):
         """
-        Formula(self) + Formula(other)
+        formula(self) + formula(other)
         """
-        other = Formula(other)
+        other = formula(other)
         return other + self
 
     def __mul__(self, other):
         """
-        Formula(self) * Formula(other)
+        formula(self) * formula(other)
         """
 
         if other.name is 'intercept':
-            return Formula(self)
+            return formula(self)
         elif self.name is 'intercept':
-            return Formula(other)
+            return formula(other)
         
-        other = Formula(other)
+        other = formula(other)
         return other * self
 
     def names(self):
@@ -67,28 +67,32 @@
         else:
             return list(self.name)
 
-    def __call__(self, namespace=terms, usefn=True, **extra):
+    def __call__(self, namespace=terms, usefn=True, args=(), kw={}):
         """
         Return the columns associated to self in a design matrix.
         The default behaviour is to return namespace[self.termname]
         where namespace defaults to globals().
 
         If usefn, and self.func exists then return
-            self.func(namespace=namespace, **extra)
+
+            self.func(*args, **kw)
+
+        with kw['namespace'] = namespace.
         """
         
+        kw['namespace'] = namespace
         if not hasattr(self, 'func') or not usefn:
             val = namespace[self.termname]
-            if isinstance(val, Formula):
-                val = val(namespace=namespace, **extra)
+            if isinstance(val, formula):
+                val = val(*args, **kw)
             elif callable(val):
-                val = val(**extra)
+                val = val(*args, **kw)
         else:
-            val = self.func(namespace=namespace, **extra)
+            val = self.func(*args, **kw)
         val = N.asarray(val)
         return N.squeeze(val)
 
-class Factor(Term):
+class factor(term):
 
     """
     A categorical factor.
@@ -96,7 +100,7 @@
 
     def __init__(self, termname, keys, ordinal=False):
         """
-        Factor is initialized with keys, representing all valid
+        factor is initialized with keys, representing all valid
         levels of the factor.
         """
         
@@ -114,7 +118,7 @@
                 # FIXME: n is not defined here
                 col = [float(self.keys.index(v[i])) for i in range(n)]
                 return N.array(col)
-            Term.__init__(self, self.name, func=func)
+            term.__init__(self, self.name, func=func)
 
         else:
             def func(namespace=terms):
@@ -124,9 +128,9 @@
                     col = [float((v[i] == key)) for i in range(len(v))]
                     value.append(col)
                 return N.array(value)
-            Term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)
+            term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)
 
-    def __call__(self, namespace=terms, values=False, **extra):
+    def __call__(self, namespace=terms, values=False, args=(), kw={}):
         """
         Return either the columns in the design matrix, or the
         actual values of the factor, if values==True.
@@ -135,9 +139,9 @@
         if namespace is None:
             namespace = globals()
         if not values:
-            return Term.__call__(self, namespace=namespace, usefn=True, **extra)
+            return term.__call__(self, namespace=namespace, usefn=True, args=args, kw=kw)
         else:
-            return Term.__call__(self, namespace=namespace, usefn=False, **extra)
+            return term.__call__(self, namespace=namespace, usefn=False, args=args, kw=kw)
 
     def verify(self, values):
         """
@@ -149,19 +153,19 @@
 
     def __add__(self, other):
         """
-        Formula(self) + Formula(other)
+        formula(self) + formula(other)
 
-        When adding \'intercept\' to a Factor, this just returns self.
+        When adding \'intercept\' to a factor, this just returns self.
         """
         
         if other.name is 'intercept':
-            return Formula(self)
+            return formula(self)
         else:
-            return Term.__add__(self, other)
+            return term.__add__(self, other)
 
     def main_effect(self, reference=None):
         """
-        Return the 'main effect' columns of a Factor, choosing
+        Return the 'main effect' columns of a factor, choosing
         a reference column number to remove.
         """
 
@@ -181,12 +185,12 @@
         keep.pop(reference)
         __names = self.names()
         _names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))]
-        return Term(_names, func=func, termname='%s:maineffect' % self.termname)
+        return term(_names, func=func, termname='%s:maineffect' % self.termname)
 
-class Quantitative(Term):
+class quantitative(term):
 
     """
-    A subclass of Term that presumes namespace[self.termname] is
+    A subclass of term that presumes namespace[self.termname] is
     an ndarray.
 
     Basically used for __pow__ method and (looking forward) for splines.
@@ -195,7 +199,7 @@
 
     def __pow__(self, power):
         """
-        Raise the quantitative Term's values to an integer power, i.e.
+        Raise the quantitative term's values to an integer power, i.e.
         polynomial.
         """
         try:
@@ -211,13 +215,13 @@
         def func(obj=self, namespace=terms, power=power, **extra):
             x = N.asarray(obj(namespace=namespace, **extra))
             return N.power(x, power)
-        value = Term(name, func=func)
+        value = term(name, func=func)
         value.power = power
         return value
 
-class FuncQuant(Quantitative):
+class func_quant(quantitative):
     """
-    A Term for a quantitative function of a Term.
+    A term for a quantitative function of a term.
     """
 
     counter = 0
@@ -238,18 +242,18 @@
         except:
             termname = 'f%d(%s)' % (FuncQuant.counter, quant.name)
             FuncQuant.counter += 1
-        Term.__init__(self, termname, func=func)
+        term.__init__(self, termname, func=func)
 
-class Formula:
+class formula:
 
     """
 
-    A Formula object for manipulating design matrices in regression models,
-    essentially consisting of a list of Term instances.
+    A formula object for manipulating design matrices in regression models,
+    essentially consisting of a list of term instances.
 
     The object supports addition and multiplication which correspond
     to concatenation and pairwise multiplication, respectively,
-    of the columns of the two Formulas.
+    of the columns of the two formulas.
     """
     
     def _terms_changed(self):
@@ -258,19 +262,19 @@
 
     def __init__(self, terms):
         """
-        Create a Formula from either:
+        Create a formula from either:
 
-        i) a Formula object
-        ii) a sequence of Term instances
-        iii) one Term
+        i) a formula object
+        ii) a sequence of term instances
+        iii) one term
 
         """
 
-        if isinstance(terms, Formula):
+        if isinstance(terms, formula):
             self.terms = copy.copy(list(terms.terms))
         elif type(terms) is types.ListType:
             self.terms = terms
-        elif isinstance(terms, Term):
+        elif isinstance(terms, term):
             self.terms = [terms]
         else: 
             raise ValueError
@@ -279,18 +283,18 @@
 
     def __str__(self):
         """
-        String representation of list of termnames of a Formula.
+        String representation of list of termnames of a formula.
         """
         value = []
         for term in self.terms:
             value += [term.termname]
         return '<formula: %s>' % ' + '.join(value)
 
-    def __call__(self, namespace=terms, nrow=-1, **extra):
+    def __call__(self, namespace=terms, nrow=-1, args=(), kw={}):
         """
-        Create (transpose) of the design matrix of the Formula within
-        namespace. Extra arguments are passed to each Term instance. If
-        the Formula just contains an intercept, then the keyword
+        Create (transpose) of the design matrix of the formula within
+        namespace. Extra arguments are passed to each term instance. If
+        the formula just contains an intercept, then the keyword
         argument 'n' indicates the number of rows (observations).
         """
         
@@ -299,7 +303,7 @@
         allvals = []
         intercept = False
         for term in self.terms:
-            val = term(namespace=namespace, **extra)
+            val = term(namespace=namespace, args=args, kw=kw)
             if term.termname == 'intercept':
                 intercept = True
             elif val.ndim == 1:
@@ -330,7 +334,7 @@
         Determine whether a given term is in a formula.
         """
 
-        if not isinstance(term, Formula):
+        if not isinstance(term, formula):
             return term.termname in self.termnames()
         elif len(term.terms) == 1:
             term = term.terms[0]
@@ -365,7 +369,7 @@
 
     def names(self):
         """
-        Return a list of the names in the Formula. The order of the
+        Return a list of the names in the formula. The order of the
         names corresponds to the order of the columns when self
         is evaluated.
         """
@@ -378,7 +382,7 @@
     def termnames(self):
         """
         Return a list of the term names in the formula. These
-        are the names of each Term instance in self.
+        are the names of each term instance in self.
         """
 
         names = []
@@ -386,21 +390,21 @@
             names += [term.termname]
         return names
 
-    def design(self, namespace=terms, **keywords):
+    def design(self, namespace=terms, args=(), kw={}):
         """
         transpose(self(namespace=namespace, **keywords))
         """
-        return N.transpose(self(namespace=namespace, **keywords))
+        return self(namespace=namespace, args=args, kw=kw).T
 
     def __mul__(self, other, nested=False):
         """
-        This returns a Formula whose columns are the pairwise
+        This returns a formula whose columns are the pairwise
         product of the columns of self and other.
 
         TO DO: check for nesting relationship. Should not be too difficult.
         """
 
-        other = Formula(other)
+        other = formula(other)
 
         selftermnames = self.termnames()
         othertermnames = other.termnames()
@@ -423,9 +427,9 @@
                 othernames = other.terms[j].names()
 
                 if self.terms[i].name is 'intercept':
-                    term = other.terms[j]
+                    _term = other.terms[j]
                 elif other.terms[j].name is 'intercept':
-                    term = self.terms[i]
+                    _term = self.terms[i]
 
                 else:
                     names = []
@@ -451,36 +455,36 @@
                                 value.append(selfval[r] * otherval[s])
 
                         return N.array(value)
-                    term = Term(names, func=func, termname=termname)
-                terms.append(term)
+                    _term = term(names, func=func, termname=termname)
+                terms.append(_term)
 
-        return Formula(terms)
+        return formula(terms)
     
     def __add__(self, other):
 
         """
-        Return a Formula whose columns are the
+        Return a formula whose columns are the
         concatenation of the columns of self and other.
 
-        Terms in the formula are sorted alphabetically.
+        terms in the formula are sorted alphabetically.
         """
 
-        other = Formula(other)
+        other = formula(other)
         terms = self.terms + other.terms
         pieces = [(term.name, term) for term in terms]
         pieces.sort()
         terms = [piece[1] for piece in pieces]
-        return Formula(terms)
+        return formula(terms)
 
     def __sub__(self, other):
 
         """
-        Return a Formula with all terms in other removed from self.
-        If other contains Term instances not in Formula, this
+        Return a formula with all terms in other removed from self.
+        If other contains term instances not in formula, this
         function does not raise an exception.
         """
 
-        other = Formula(other)
+        other = formula(other)
         terms = copy.copy(self.terms)
 
         for term in other.terms:
@@ -488,7 +492,7 @@
                 if terms[i].termname == term.termname:
                     terms.pop(i)
                     break 
-        return Formula(terms)
+        return formula(terms)
 
 def isnested(A, B, namespace=globals()):
     """
@@ -524,9 +528,9 @@
 
 def _intercept_fn(nrow=1, **extra):
     return N.ones((1,nrow))
-I = Term('intercept', func=_intercept_fn)
+I = term('intercept', func=_intercept_fn)
 I.__doc__ = """
-Intercept term in a Formula. If intercept is the
+Intercept term in a formula. If intercept is the
 only term in the formula, then a keywords argument
 \'nrow\' is needed.
 
@@ -536,7 +540,7 @@
 >>> I(nrow=5)
 array([1, 1, 1, 1, 1])
 
->>> f=Formula(I)
+>>> f=formula(I)
 >>> f(nrow=5)
 array([1, 1, 1, 1, 1])
 

Modified: trunk/Lib/sandbox/models/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_regression.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_regression.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,6 +1,6 @@
 import unittest
 from numpy.random import standard_normal
-from scipy.sandbox.models.regression import OLSModel, ARModel
+from scipy.sandbox.models.regression import ols_model, ar_model
 from numpy.testing import *
 
 W = standard_normal
@@ -10,14 +10,14 @@
     def testOLS(self):
         X = W((40,10))
         Y = W((40,))
-        model = OLSModel(design=X)
+        model = ols_model(design=X)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 30)
 
     def testAR(self):
         X = W((40,10))
         Y = W((40,))
-        model = ARModel(design=X, rho=0.4)
+        model = ar_model(design=X, rho=0.4)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 30)
 
@@ -25,7 +25,7 @@
         X = W((40,10))
         X[:,0] = X[:,1] + X[:,2]
         Y = W((40,))
-        model = OLSModel(design=X)
+        model = ols_model(design=X)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 31)
 
@@ -33,14 +33,10 @@
         X = W((40,10))
         X[:,0] = X[:,1] + X[:,2]
         Y = W((40,))
-        model = ARModel(design=X, rho=0.9)
+        model = ar_model(design=X, rho=0.9)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 31)
 
-def suite():
-    suite = unittest.makeSuite(RegressionTest)
-    return suite
-        
 
 if __name__ == '__main__':
     ScipyTest.run()

Modified: trunk/Lib/sandbox/models/tests/test_robust.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_robust.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_robust.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,4 +1,4 @@
-import models as S
+import scipy.sandbox.models as S
 import unittest
 import numpy.random as R
 import numpy as N
@@ -10,7 +10,7 @@
     def testRobust(self):
         X = W((40,10))
         Y = W((40,))
-        model = S.rlm.RobustLinearModel(design=X)
+        model = S.rlm(design=X)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 30)
 
@@ -18,15 +18,10 @@
         X = W((40,10))
         X[:,0] = X[:,1] + X[:,2]
         Y = W((40,))
-        model = S.rlm.RobustLinearModel(design=X)
+        model = S.rlm(design=X)
         results = model.fit(Y)
         self.assertEquals(results.df_resid, 31)
 
 
-def suite():
-    suite = unittest.makeSuite(RegressionTest)
-    return suite
-        
-
 if __name__ == '__main__':
     unittest.main()