[Scipy-svn] r3275 - trunk/scipy/sandbox/models/family

Wed Aug 29 00:05:44 EDT 2007

Author: jonathan.taylor
Date: 2007-08-28 23:05:33 -0500 (Tue, 28 Aug 2007)
New Revision: 3275

Modified:
   trunk/scipy/sandbox/models/family/__init__.py
   trunk/scipy/sandbox/models/family/family.py
   trunk/scipy/sandbox/models/family/links.py
   trunk/scipy/sandbox/models/family/varfuncs.py
Log:
docstrings for scipy.sandbox.models.family

Modified: trunk/scipy/sandbox/models/family/__init__.py
===================================================================

--- trunk/scipy/sandbox/models/family/__init__.py	2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/__init__.py	2007-08-29 04:05:33 UTC (rev 3275)
@@ -1,3 +1,15 @@
+'''
+This module contains the one-parameter exponential families used
+for fitting GLMs and GAMs.
+
+These families are described in
+
+   P. McCullagh and J. A. Nelder.  "Generalized linear models."
+   Monographs on Statistics and Applied Probability.
+   Chapman & Hall, London, 1983.
+
+'''
+
 from scipy.sandbox.models.family.family import Gaussian, Family, \
      Poisson, Gamma, InverseGaussian, Binomial
 

Modified: trunk/scipy/sandbox/models/family/family.py
===================================================================
--- trunk/scipy/sandbox/models/family/family.py	2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/family.py	2007-08-29 04:05:33 UTC (rev 3275)
@@ -2,13 +2,36 @@
 from scipy.sandbox.models.family import links as L
 from scipy.sandbox.models.family import varfuncs as V
 
-class Family:
+class Family(object):
 
+    """
+    A class to model one-parameter exponential
+    families.
+
+    INPUTS:
+       link      -- a Link instance
+       variance  -- a variance function (models means as a function
+                    of mean)
+
+    """
+
     valid = [-N.inf, N.inf]
 
     tol = 1.0e-05
 
+    def _setlink(self, link):
+        self._link = link
+        if hasattr(self, "links"):
+            if link not in links:
+                raise ValueError, 'invalid link for family, should be in %s' % `self.links`
+
+    def _getlink(self):
+        return self._link
+
+    link = property(_getlink, _setlink)
+
     def __init__(self, link, variance):
+        
         self.link = link
         self.variance = variance
 
@@ -16,32 +39,93 @@
         
         """
         Weights for IRLS step.
+
+        w = 1 / (link'(mu)**2 * variance(mu))
+        
+        INPUTS:
+           mu  -- mean parameter in exponential family
+
+        OUTPUTS:
+           w   -- weights used in WLS step of GLM/GAM fit
+                  
         """
 
         return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
 
     def deviance(self, Y, mu, scale=1.):
+        """
+        Deviance of (Y,mu) pair. Deviance is usually defined
+        as the difference
+
+        DEV = (SUM_i -2 log Likelihood(Y_i,mu_i) + 2 log Likelihood(mu_i,mu_i)) / scale
+    
+        INPUTS:
+           Y     -- response variable
+           mu    -- mean parameter
+           scale -- optional scale in denominator of deviance
+
+        OUTPUTS: dev
+           dev   -- DEV, as described aboce
+
+        """   
+        
         return N.power(self.devresid(Y, mu), 2).sum() / scale
 
     def devresid(self, Y, mu):
+        """
+        The deviance residuals, defined as the residuals
+        in the deviance.
+
+        Without knowing the link, they default to Pearson residuals
+
+        resid_P = (Y - mu) * sqrt(weight(mu))
+
+        INPUTS:
+           Y     -- response variable
+           mu    -- mean parameter
+
+        OUTPUTS: resid
+           resid -- deviance residuals
+        """
+        
         return (Y - mu) * N.sqrt(self.weights(mu))
 
     def fitted(self, eta):
         """
         Fitted values based on linear predictors eta.
+
+        INPUTS:
+           eta  -- values of linear predictors, say,
+                   X beta in a generalized linear model.
+
+        OUTPUTS: mu
+           mu   -- link.inverse(eta), mean parameter based on eta
+           
         """
         return self.link.inverse(eta)
 
     def predict(self, mu):
         """
         Linear predictors based on given mu values.
+
+        INPUTS:
+           mu   -- mean parameter of one-parameter exponential family
+
+        OUTPUTS: eta
+           eta  -- link(mu), linear predictors, based on
+                   mean parameters mu
+           
         """
         return self.link(mu)
 
 class Poisson(Family):
 
     """
-    Poisson exponential family in glm context.
+    Poisson exponential family.
+
+    INPUTS:
+       link      -- a Link instance
+
     """
 
     links = [L.log, L.identity, L.sqrt]
@@ -49,81 +133,125 @@
     valid = [0, N.inf]
 
     def __init__(self, link=L.log):
-        if link not in Poisson.links:
-            raise ValueError, 'invalid link for Poisson family'
         self.variance = Poisson.variance
         self.link = link
 
     def devresid(self, Y, mu):
+        """
+        Poisson deviance residual
+
+        INPUTS:
+           Y     -- response variable
+           mu    -- mean parameter
+
+        OUTPUTS: resid
+           resid -- deviance residuals
+
+        """
         return N.sign(Y - mu) * N.sqrt(2 * Y * N.log(Y / mu) - 2 * (Y - mu))
 
 class Gaussian(Family):
 
     """
-    Gaussian exponential family in glm context.
+    Gaussian exponential family.
+
+    INPUTS:
+       link      -- a Link instance
+
     """
 
     links = [L.log, L.identity, L.inverse]
     variance = V.constant
 
     def __init__(self, link=L.identity):
-        if link not in Gaussian.links:
-            raise ValueError, 'invalid link for Gaussian family'
         self.variance = Gaussian.variance
         self.link = link
 
     def devresid(self, Y, mu, scale=1.):
+        """
+        Gaussian deviance residual
+
+        INPUTS:
+           Y     -- response variable
+           mu    -- mean parameter
+           scale -- optional scale in denominator (after taking sqrt)
+
+        OUTPUTS: resid
+           resid -- deviance residuals
+        """
+
         return (Y - mu) / N.sqrt(self.variance(mu) * scale)
 
 class Gamma(Family):
 
     """
-    Gaussian exponential family in glm context.
+    Gamma exponential family.
+
+    INPUTS:
+       link      -- a Link instance
+
+    BUGS:
+       no deviance residuals?
+
     """
 
     links = [L.log, L.identity, L.inverse]
     variance = V.mu_squared
 
     def __init__(self, link=L.identity):
-        if link not in Gamma.links:
-            raise ValueError, 'invalid link for Gamma family'
         self.variance = Gamma.variance
         self.link = link
-
     
 
 class Binomial(Family):
 
     """
-    Binomial exponential family in glm context.
+    Binomial exponential family.
+
+    INPUTS:
+       link      -- a Link instance
+       n         -- number of trials for Binomial
     """
 
     links = [L.logit, L.probit, L.cauchy, L.log, L.cloglog]
     variance = V.binary
 
     def __init__(self, link=L.logit, n=1):
-        if link not in Binomial.links:
-            raise ValueError, 'invalid link for Binomial family'
         self.n = n
         self.variance = V.Binomial(n=self.n)
         self.link = link
 
-    def devresid(self, Y, mu, scale=1.):
+    def devresid(self, Y, mu):
+        """
+        Binomial deviance residual
+
+        INPUTS:
+           Y     -- response variable
+           mu    -- mean parameter
+
+        OUTPUTS: resid
+           resid -- deviance residuals
+
+        """
+
         mu = self.link.clean(mu)
         return N.sign(Y - mu) * N.sqrt(-2 * (Y * N.log(mu / self.n) + (self.n - Y) * N.log(1 - mu / self.n)))
 
 class InverseGaussian(Family):
 
     """
-    Gaussian exponential family in glm context.
+    InverseGaussian exponential family.
+
+    INPUTS:
+       link      -- a Link instance
+       n         -- number of trials for Binomial
+
     """
 
     links = [L.inverse_squared, L.inverse, L.identity, L.log]
     variance = V.mu_cubed
 
-    def __init__(self, link=L.identity, n=1):
-        if link not in InverseGaussian.links:
-            raise ValueError, 'invalid link for InverseGaussian family'
+    def __init__(self, link=L.identity):
         self.n = n
         self.variance = InverseGaussian.variance
         self.link = link

Modified: trunk/scipy/sandbox/models/family/links.py
===================================================================
--- trunk/scipy/sandbox/models/family/links.py	2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/links.py	2007-08-29 04:05:33 UTC (rev 3275)
@@ -3,31 +3,97 @@
 
 class Link:
 
+    """
+    A generic link function for one-parameter exponential
+    family, with call, inverse and deriv methods.
+
+    """
+
     def initialize(self, Y):
         return N.asarray(Y).mean() * N.ones(Y.shape)
 
+    def __call__(self, p):
+        return NotImplementedError
+
+    def inverse(self, z):
+        return NotImplementedError
+
+    def deriv(self, p):
+        return NotImplementedError
+
+        
 class Logit(Link):
 
     """
     The logit transform as a link function:
 
-    g(x) = log(x / (1 - x))
+    g'(x) = 1 / (x * (1 - x))
+    g^(-1)(x) = exp(x)/(1 + exp(x))
+
     """
 
     tol = 1.0e-10
 
     def clean(self, p):
+        """
+        Clip logistic values to range (tol, 1-tol)
+
+        INPUTS:
+           p     -- probabilities
+
+        OUTPUTS: pclip
+           pclip -- clipped probabilities
+        """
+        
         return N.clip(p, Logit.tol, 1. - Logit.tol)
 
     def __call__(self, p):
+        """
+        Logit transform
+        
+        g(p) = log(p / (1 - p))
+
+        INPUTS:
+           p   -- probabilities
+
+        OUTPUTS: z
+           z   -- logit transform of p
+
+        """
+
         p = self.clean(p)
         return N.log(p / (1. - p))
 
     def inverse(self, z):
+        """
+        Inverse logit transform
+        
+        h(z) = exp(z)/(1+exp(z))
+
+        INPUTS:
+           z -- logit transform of p
+
+        OUTPUTS: p
+           p   -- probabilities
+
+        """
         t = N.exp(z)
         return t / (1. + t)
 
     def deriv(self, p):
+
+        """
+        Derivative of logit transform
+        
+        g(p) = 1 / (p * (1 - p))
+
+        INPUTS:
+           p   -- probabilities
+
+        OUTPUTS: y
+           y   -- derivative of logit transform of p
+
+        """
         p = self.clean(p)
         return 1. / (p * (1 - p))
 
@@ -46,12 +112,50 @@
         self.power = power
 
     def __call__(self, x):
+        """
+        Power transform
+        
+        g(x) = x**self.power
+
+        INPUTS:
+           x   -- mean parameters
+
+        OUTPUTS: z
+           z   -- power transform of x
+
+        """
+
         return N.power(x, self.power)
 
-    def inverse(self, x):
-        return N.power(x, 1. / self.power)
+    def inverse(self, z):
+        """
+        Inverse of power transform
+        
+        g(x) = x**(1/self.power)
 
+        INPUTS:
+           z   -- linear predictors in glm
+
+        OUTPUTS: x
+           x   -- mean parameters
+
+        """
+        return N.power(z, 1. / self.power)
+
     def deriv(self, x):
+        """
+        Derivative of power transform
+        
+        g(x) = self.power * x**(self.power - 1)
+
+        INPUTS:
+           x   -- mean parameters
+
+        OUTPUTS: z
+           z   -- derivative of power transform of x
+
+        """
+
         return self.power * N.power(x, self.power - 1)
 
 inverse = Power(power=-1.)
@@ -101,13 +205,50 @@
         return N.clip(x, Logit.tol, N.inf)
 
     def __call__(self, x, **extra):
+        """
+        Log transform
+        
+        g(x) = log(x)
+
+        INPUTS:
+           x   -- mean parameters
+
+        OUTPUTS: z
+           z   -- log(x)
+
+        """
         x = self.clean(x)
         return N.log(x)
 
     def inverse(self, z):
+        """
+        Inverse of log transform
+        
+        g(x) = exp(x)
+
+        INPUTS:
+           z   -- linear predictors in glm
+
+        OUTPUTS: x
+           x   -- exp(z)
+
+        """
         return N.exp(z)
 
     def deriv(self, x):
+        """
+        Derivative of log transform
+        
+        g(x) = 1/x
+
+        INPUTS:
+           x   -- mean parameters
+
+        OUTPUTS: z
+           z   -- derivative of log transform of x
+
+        """
+
         x = self.clean(x)
         return 1. / x
 
@@ -126,13 +267,49 @@
         self.dbn = dbn
 
     def __call__(self, p):
+        """
+        CDF link
+        
+        g(p) = self.dbn.pdf(p)
+
+        INPUTS:
+           p   -- mean parameters
+
+        OUTPUTS: z
+           z   -- derivative of CDF transform of p
+
+        """
         p = self.clean(p)
         return self.dbn.ppf(p)
 
     def inverse(self, z):
+        """
+        Derivative of CDF link
+        
+        g(z) = self.dbn.cdf(z)
+
+        INPUTS:
+           z   -- linear predictors in glm
+
+        OUTPUTS: p
+           p   -- inverse of CDF link of z
+
+        """
         return self.dbn.cdf(z)
 
     def deriv(self, p):
+        """
+        Derivative of CDF link
+        
+        g(p) = 1/self.dbn.pdf(self.dbn.ppf(p))
+
+        INPUTS:
+           x   -- mean parameters
+
+        OUTPUTS: z
+           z   -- derivative of CDF transform of x
+
+        """
         p = self.clean(p)
         return 1. / self.dbn.pdf(self(p))
 
@@ -164,13 +341,49 @@
     """
 
     def __call__(self, p):
+        """
+        C-Log-Log transform
+        
+        g(p) = log(-log(p))
+
+        INPUTS:
+           p   -- mean parameters
+
+        OUTPUTS: z
+           z   -- log(-log(p))
+
+        """
         p = self.clean(p)
         return N.log(-N.log(p))
 
     def inverse(self, z):
+        """
+        Inverse of C-Log-Log transform
+        
+        g(z) = exp(-exp(z))
+
+        INPUTS:
+           z   -- linear predictor scale
+
+        OUTPUTS: p
+           p   -- mean parameters
+
+        """
         return N.exp(-N.exp(z))
 
     def deriv(self, p):
+        """
+        Derivatve of C-Log-Log transform
+        
+        g(p) = - 1 / (log(p) * p)
+
+        INPUTS:
+           p   -- mean parameters
+
+        OUTPUTS: z
+           z   --  - 1 / (log(p) * p)
+
+        """
         p = self.clean(p)
         return -1. / (N.log(p) * p)
 

Modified: trunk/scipy/sandbox/models/family/varfuncs.py
===================================================================
--- trunk/scipy/sandbox/models/family/varfuncs.py	2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/varfuncs.py	2007-08-29 04:05:33 UTC (rev 3275)
@@ -9,21 +9,45 @@
     """
 
     def __call__(self, mu):
+        """
+        Default variance function
+
+        INPUTS:
+           mu  -- mean parameters
+
+        OUTPUTS: v
+           v   -- ones(mu.shape)
+        """   
+         
         return N.ones(mu.shape, N.float64)
 
 constant = VarianceFunction()
 
 class Power:
     """
-    Variance function:
+    Power variance function:
 
     V(mu) = fabs(mu)**power
+    
+    INPUTS:
+       power -- exponent used in power variance function
+
     """
 
     def __init__(self, power=1.):
         self.power = power
 
     def __call__(self, mu):
+        
+        """
+        Power variance function
+
+        INPUTS:
+           mu  -- mean parameters
+
+        OUTPUTS: v
+           v   -- fabs(mu)**self.power
+        """   
         return N.power(N.fabs(mu), self.power)
 
 class Binomial:
@@ -31,6 +55,9 @@
     Binomial variance function
 
     p = mu / n; V(mu) = p * (1 - p) * n
+
+    INPUTS:
+       n -- number of trials in Binomial 
     """
 
     tol = 1.0e-10
@@ -42,6 +69,15 @@
         return N.clip(p, Binomial.tol, 1 - Binomial.tol)
 
     def __call__(self, mu):
+        """
+        Binomial variance function
+
+        INPUTS:
+           mu  -- mean parameters
+
+        OUTPUTS: v
+           v   -- mu / self.n * (1 - mu / self.n) * self.n
+        """   
         p = self.clean(mu / self.n)
         return p * (1 - p) * self.n