[Scipy-svn] r3275 - trunk/scipy/sandbox/models/family
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Aug 29 00:05:44 EDT 2007
Author: jonathan.taylor
Date: 2007-08-28 23:05:33 -0500 (Tue, 28 Aug 2007)
New Revision: 3275
Modified:
trunk/scipy/sandbox/models/family/__init__.py
trunk/scipy/sandbox/models/family/family.py
trunk/scipy/sandbox/models/family/links.py
trunk/scipy/sandbox/models/family/varfuncs.py
Log:
docstrings for scipy.sandbox.models.family
Modified: trunk/scipy/sandbox/models/family/__init__.py
===================================================================
--- trunk/scipy/sandbox/models/family/__init__.py 2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/__init__.py 2007-08-29 04:05:33 UTC (rev 3275)
@@ -1,3 +1,15 @@
+'''
+This module contains the one-parameter exponential families used
+for fitting GLMs and GAMs.
+
+These families are described in
+
+ P. McCullagh and J. A. Nelder. "Generalized linear models."
+ Monographs on Statistics and Applied Probability.
+ Chapman & Hall, London, 1983.
+
+'''
+
from scipy.sandbox.models.family.family import Gaussian, Family, \
Poisson, Gamma, InverseGaussian, Binomial
Modified: trunk/scipy/sandbox/models/family/family.py
===================================================================
--- trunk/scipy/sandbox/models/family/family.py 2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/family.py 2007-08-29 04:05:33 UTC (rev 3275)
@@ -2,13 +2,36 @@
from scipy.sandbox.models.family import links as L
from scipy.sandbox.models.family import varfuncs as V
-class Family:
+class Family(object):
+ """
+ A class to model one-parameter exponential
+ families.
+
+ INPUTS:
+ link -- a Link instance
+ variance -- a variance function (models means as a function
+ of mean)
+
+ """
+
valid = [-N.inf, N.inf]
tol = 1.0e-05
+ def _setlink(self, link):
+ self._link = link
+ if hasattr(self, "links"):
+ if link not in links:
+ raise ValueError, 'invalid link for family, should be in %s' % `self.links`
+
+ def _getlink(self):
+ return self._link
+
+ link = property(_getlink, _setlink)
+
def __init__(self, link, variance):
+
self.link = link
self.variance = variance
@@ -16,32 +39,93 @@
"""
Weights for IRLS step.
+
+ w = 1 / (link'(mu)**2 * variance(mu))
+
+ INPUTS:
+ mu -- mean parameter in exponential family
+
+ OUTPUTS:
+ w -- weights used in WLS step of GLM/GAM fit
+
"""
return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
def deviance(self, Y, mu, scale=1.):
+ """
+ Deviance of (Y,mu) pair. Deviance is usually defined
+ as the difference
+
+ DEV = (SUM_i -2 log Likelihood(Y_i,mu_i) + 2 log Likelihood(mu_i,mu_i)) / scale
+
+ INPUTS:
+ Y -- response variable
+ mu -- mean parameter
+ scale -- optional scale in denominator of deviance
+
+ OUTPUTS: dev
+ dev -- DEV, as described aboce
+
+ """
+
return N.power(self.devresid(Y, mu), 2).sum() / scale
def devresid(self, Y, mu):
+ """
+ The deviance residuals, defined as the residuals
+ in the deviance.
+
+ Without knowing the link, they default to Pearson residuals
+
+ resid_P = (Y - mu) * sqrt(weight(mu))
+
+ INPUTS:
+ Y -- response variable
+ mu -- mean parameter
+
+ OUTPUTS: resid
+ resid -- deviance residuals
+ """
+
return (Y - mu) * N.sqrt(self.weights(mu))
def fitted(self, eta):
"""
Fitted values based on linear predictors eta.
+
+ INPUTS:
+ eta -- values of linear predictors, say,
+ X beta in a generalized linear model.
+
+ OUTPUTS: mu
+ mu -- link.inverse(eta), mean parameter based on eta
+
"""
return self.link.inverse(eta)
def predict(self, mu):
"""
Linear predictors based on given mu values.
+
+ INPUTS:
+ mu -- mean parameter of one-parameter exponential family
+
+ OUTPUTS: eta
+ eta -- link(mu), linear predictors, based on
+ mean parameters mu
+
"""
return self.link(mu)
class Poisson(Family):
"""
- Poisson exponential family in glm context.
+ Poisson exponential family.
+
+ INPUTS:
+ link -- a Link instance
+
"""
links = [L.log, L.identity, L.sqrt]
@@ -49,81 +133,125 @@
valid = [0, N.inf]
def __init__(self, link=L.log):
- if link not in Poisson.links:
- raise ValueError, 'invalid link for Poisson family'
self.variance = Poisson.variance
self.link = link
def devresid(self, Y, mu):
+ """
+ Poisson deviance residual
+
+ INPUTS:
+ Y -- response variable
+ mu -- mean parameter
+
+ OUTPUTS: resid
+ resid -- deviance residuals
+
+ """
return N.sign(Y - mu) * N.sqrt(2 * Y * N.log(Y / mu) - 2 * (Y - mu))
class Gaussian(Family):
"""
- Gaussian exponential family in glm context.
+ Gaussian exponential family.
+
+ INPUTS:
+ link -- a Link instance
+
"""
links = [L.log, L.identity, L.inverse]
variance = V.constant
def __init__(self, link=L.identity):
- if link not in Gaussian.links:
- raise ValueError, 'invalid link for Gaussian family'
self.variance = Gaussian.variance
self.link = link
def devresid(self, Y, mu, scale=1.):
+ """
+ Gaussian deviance residual
+
+ INPUTS:
+ Y -- response variable
+ mu -- mean parameter
+ scale -- optional scale in denominator (after taking sqrt)
+
+ OUTPUTS: resid
+ resid -- deviance residuals
+ """
+
return (Y - mu) / N.sqrt(self.variance(mu) * scale)
class Gamma(Family):
"""
- Gaussian exponential family in glm context.
+ Gamma exponential family.
+
+ INPUTS:
+ link -- a Link instance
+
+ BUGS:
+ no deviance residuals?
+
"""
links = [L.log, L.identity, L.inverse]
variance = V.mu_squared
def __init__(self, link=L.identity):
- if link not in Gamma.links:
- raise ValueError, 'invalid link for Gamma family'
self.variance = Gamma.variance
self.link = link
-
class Binomial(Family):
"""
- Binomial exponential family in glm context.
+ Binomial exponential family.
+
+ INPUTS:
+ link -- a Link instance
+ n -- number of trials for Binomial
"""
links = [L.logit, L.probit, L.cauchy, L.log, L.cloglog]
variance = V.binary
def __init__(self, link=L.logit, n=1):
- if link not in Binomial.links:
- raise ValueError, 'invalid link for Binomial family'
self.n = n
self.variance = V.Binomial(n=self.n)
self.link = link
- def devresid(self, Y, mu, scale=1.):
+ def devresid(self, Y, mu):
+ """
+ Binomial deviance residual
+
+ INPUTS:
+ Y -- response variable
+ mu -- mean parameter
+
+ OUTPUTS: resid
+ resid -- deviance residuals
+
+ """
+
mu = self.link.clean(mu)
return N.sign(Y - mu) * N.sqrt(-2 * (Y * N.log(mu / self.n) + (self.n - Y) * N.log(1 - mu / self.n)))
class InverseGaussian(Family):
"""
- Gaussian exponential family in glm context.
+ InverseGaussian exponential family.
+
+ INPUTS:
+ link -- a Link instance
+ n -- number of trials for Binomial
+
"""
links = [L.inverse_squared, L.inverse, L.identity, L.log]
variance = V.mu_cubed
- def __init__(self, link=L.identity, n=1):
- if link not in InverseGaussian.links:
- raise ValueError, 'invalid link for InverseGaussian family'
+ def __init__(self, link=L.identity):
self.n = n
self.variance = InverseGaussian.variance
self.link = link
Modified: trunk/scipy/sandbox/models/family/links.py
===================================================================
--- trunk/scipy/sandbox/models/family/links.py 2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/links.py 2007-08-29 04:05:33 UTC (rev 3275)
@@ -3,31 +3,97 @@
class Link:
+ """
+ A generic link function for one-parameter exponential
+ family, with call, inverse and deriv methods.
+
+ """
+
def initialize(self, Y):
return N.asarray(Y).mean() * N.ones(Y.shape)
+ def __call__(self, p):
+ return NotImplementedError
+
+ def inverse(self, z):
+ return NotImplementedError
+
+ def deriv(self, p):
+ return NotImplementedError
+
+
class Logit(Link):
"""
The logit transform as a link function:
- g(x) = log(x / (1 - x))
+ g'(x) = 1 / (x * (1 - x))
+ g^(-1)(x) = exp(x)/(1 + exp(x))
+
"""
tol = 1.0e-10
def clean(self, p):
+ """
+ Clip logistic values to range (tol, 1-tol)
+
+ INPUTS:
+ p -- probabilities
+
+ OUTPUTS: pclip
+ pclip -- clipped probabilities
+ """
+
return N.clip(p, Logit.tol, 1. - Logit.tol)
def __call__(self, p):
+ """
+ Logit transform
+
+ g(p) = log(p / (1 - p))
+
+ INPUTS:
+ p -- probabilities
+
+ OUTPUTS: z
+ z -- logit transform of p
+
+ """
+
p = self.clean(p)
return N.log(p / (1. - p))
def inverse(self, z):
+ """
+ Inverse logit transform
+
+ h(z) = exp(z)/(1+exp(z))
+
+ INPUTS:
+ z -- logit transform of p
+
+ OUTPUTS: p
+ p -- probabilities
+
+ """
t = N.exp(z)
return t / (1. + t)
def deriv(self, p):
+
+ """
+ Derivative of logit transform
+
+ g(p) = 1 / (p * (1 - p))
+
+ INPUTS:
+ p -- probabilities
+
+ OUTPUTS: y
+ y -- derivative of logit transform of p
+
+ """
p = self.clean(p)
return 1. / (p * (1 - p))
@@ -46,12 +112,50 @@
self.power = power
def __call__(self, x):
+ """
+ Power transform
+
+ g(x) = x**self.power
+
+ INPUTS:
+ x -- mean parameters
+
+ OUTPUTS: z
+ z -- power transform of x
+
+ """
+
return N.power(x, self.power)
- def inverse(self, x):
- return N.power(x, 1. / self.power)
+ def inverse(self, z):
+ """
+ Inverse of power transform
+
+ g(x) = x**(1/self.power)
+ INPUTS:
+ z -- linear predictors in glm
+
+ OUTPUTS: x
+ x -- mean parameters
+
+ """
+ return N.power(z, 1. / self.power)
+
def deriv(self, x):
+ """
+ Derivative of power transform
+
+ g(x) = self.power * x**(self.power - 1)
+
+ INPUTS:
+ x -- mean parameters
+
+ OUTPUTS: z
+ z -- derivative of power transform of x
+
+ """
+
return self.power * N.power(x, self.power - 1)
inverse = Power(power=-1.)
@@ -101,13 +205,50 @@
return N.clip(x, Logit.tol, N.inf)
def __call__(self, x, **extra):
+ """
+ Log transform
+
+ g(x) = log(x)
+
+ INPUTS:
+ x -- mean parameters
+
+ OUTPUTS: z
+ z -- log(x)
+
+ """
x = self.clean(x)
return N.log(x)
def inverse(self, z):
+ """
+ Inverse of log transform
+
+ g(x) = exp(x)
+
+ INPUTS:
+ z -- linear predictors in glm
+
+ OUTPUTS: x
+ x -- exp(z)
+
+ """
return N.exp(z)
def deriv(self, x):
+ """
+ Derivative of log transform
+
+ g(x) = 1/x
+
+ INPUTS:
+ x -- mean parameters
+
+ OUTPUTS: z
+ z -- derivative of log transform of x
+
+ """
+
x = self.clean(x)
return 1. / x
@@ -126,13 +267,49 @@
self.dbn = dbn
def __call__(self, p):
+ """
+ CDF link
+
+ g(p) = self.dbn.pdf(p)
+
+ INPUTS:
+ p -- mean parameters
+
+ OUTPUTS: z
+ z -- derivative of CDF transform of p
+
+ """
p = self.clean(p)
return self.dbn.ppf(p)
def inverse(self, z):
+ """
+ Derivative of CDF link
+
+ g(z) = self.dbn.cdf(z)
+
+ INPUTS:
+ z -- linear predictors in glm
+
+ OUTPUTS: p
+ p -- inverse of CDF link of z
+
+ """
return self.dbn.cdf(z)
def deriv(self, p):
+ """
+ Derivative of CDF link
+
+ g(p) = 1/self.dbn.pdf(self.dbn.ppf(p))
+
+ INPUTS:
+ x -- mean parameters
+
+ OUTPUTS: z
+ z -- derivative of CDF transform of x
+
+ """
p = self.clean(p)
return 1. / self.dbn.pdf(self(p))
@@ -164,13 +341,49 @@
"""
def __call__(self, p):
+ """
+ C-Log-Log transform
+
+ g(p) = log(-log(p))
+
+ INPUTS:
+ p -- mean parameters
+
+ OUTPUTS: z
+ z -- log(-log(p))
+
+ """
p = self.clean(p)
return N.log(-N.log(p))
def inverse(self, z):
+ """
+ Inverse of C-Log-Log transform
+
+ g(z) = exp(-exp(z))
+
+ INPUTS:
+ z -- linear predictor scale
+
+ OUTPUTS: p
+ p -- mean parameters
+
+ """
return N.exp(-N.exp(z))
def deriv(self, p):
+ """
+ Derivatve of C-Log-Log transform
+
+ g(p) = - 1 / (log(p) * p)
+
+ INPUTS:
+ p -- mean parameters
+
+ OUTPUTS: z
+ z -- - 1 / (log(p) * p)
+
+ """
p = self.clean(p)
return -1. / (N.log(p) * p)
Modified: trunk/scipy/sandbox/models/family/varfuncs.py
===================================================================
--- trunk/scipy/sandbox/models/family/varfuncs.py 2007-08-28 19:23:59 UTC (rev 3274)
+++ trunk/scipy/sandbox/models/family/varfuncs.py 2007-08-29 04:05:33 UTC (rev 3275)
@@ -9,21 +9,45 @@
"""
def __call__(self, mu):
+ """
+ Default variance function
+
+ INPUTS:
+ mu -- mean parameters
+
+ OUTPUTS: v
+ v -- ones(mu.shape)
+ """
+
return N.ones(mu.shape, N.float64)
constant = VarianceFunction()
class Power:
"""
- Variance function:
+ Power variance function:
V(mu) = fabs(mu)**power
+
+ INPUTS:
+ power -- exponent used in power variance function
+
"""
def __init__(self, power=1.):
self.power = power
def __call__(self, mu):
+
+ """
+ Power variance function
+
+ INPUTS:
+ mu -- mean parameters
+
+ OUTPUTS: v
+ v -- fabs(mu)**self.power
+ """
return N.power(N.fabs(mu), self.power)
class Binomial:
@@ -31,6 +55,9 @@
Binomial variance function
p = mu / n; V(mu) = p * (1 - p) * n
+
+ INPUTS:
+ n -- number of trials in Binomial
"""
tol = 1.0e-10
@@ -42,6 +69,15 @@
return N.clip(p, Binomial.tol, 1 - Binomial.tol)
def __call__(self, mu):
+ """
+ Binomial variance function
+
+ INPUTS:
+ mu -- mean parameters
+
+ OUTPUTS: v
+ v -- mu / self.n * (1 - mu / self.n) * self.n
+ """
p = self.clean(mu / self.n)
return p * (1 - p) * self.n
More information about the Scipy-svn
mailing list