[SciPy-User] expectation function for stats.distributions
josef.pktd at gmail.com
josef.pktd at gmail.com
Mon Aug 3 16:50:40 EDT 2009
On Sat, Aug 1, 2009 at 5:39 PM, <josef.pktd at gmail.com> wrote:
> On Sat, Aug 1, 2009 at 4:44 PM, Pierre GM<pgmdevlist at gmail.com> wrote:
>>
>> On Jul 31, 2009, at 4:10 PM, josef.pktd at gmail.com wrote:
>>
>>> On Fri, Jul 31, 2009 at 3:10 PM, nicky van foreest<vanforeest at gmail.com
>>> > wrote:
>>>> Hi Josef,
>>>>
>>>> I would like such a function.
>>>
>>> Thanks for the interest.
>>
>> Count me in as well
>>
>>> To the name: I'm up for ideas
>>
>> rv_generic.expect ?
>
> I think that's my favorite so far.
> It will be attached to continuous and discrete separately (not to
> generic) because of the difference in implementation.
> I added loc and scale to the expectation function.
> but for discrete distributions following the pattern of _drv2_moment
> for discrete might not be the best implementation.
>
Here's an updated version, one function for continuous, one for
discrete distributions
includes loc and scale,
expect_discrete is tested with all discrete distribution in stats for
mean and variance and is very accurate except for zipf (only 1e-5)
can be used as function or attached to the distributions as method.
Josef
-------------- next part --------------
'''
copy from webpage, where is the original?
Author jpktd
'''
import numpy as np
from scipy import stats, integrate
def expect(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False):
'''calculate expected value of a function with respect to the distribution
location and scale only tested on a few examples
Parameters
----------
all parameters are keyword parameters
fn : function (default: identity mapping)
Function for which integral is calculated. Takes only one argument.
args : tuple
argument (parameters) of the distribution
lb, ub : numbers
lower and upper bound for integration, default is set to the support
of the distribution
conditional : boolean (False)
If true then the integral is corrected by the conditional probability
of the integration interval. The return value is the expectation
of the function, conditional on being in the given interval.
Returns
-------
expected value : float
'''
if fn is None:
def fun(x, *args):
return x*self.pdf(x, loc=loc, scale=scale,*args)
else:
def fun(x, *args):
return fn(x)*self.pdf(x, loc=loc, scale=scale, *args)
if lb is None:
lb = (self.a - loc)/(1.0*scale)
if ub is None:
ub = (self.b - loc)/(1.0*scale)
if conditional:
invfac = self.sf(lb,*args) - self.sf(ub,*args)
else:
invfac = 1.0
return integrate.quad(fun, lb, ub,
args=args)[0]/invfac
print expect(stats.norm, lambda(x): (x)**4)
print expect(stats.norm, lambda(x): min((x)**2,1.5**2))
#Jonathans version
from scipy.stats import norm as Gaussian
c = 1.5 # our "cutoff" point
c = 0.5 # try another value
tmp = 2 * Gaussian.cdf(c) - 1
gamma = tmp + c**2 * (1 - tmp) - 2 * c * Gaussian.pdf(c)
print gamma
print expect(stats.norm, lambda(x): min((x)**2,c**2))
print 'check loc and scale - continuous'
m = expect(stats.norm, lambda(x): (x), loc=2, scale=2)
mnc2 = expect(stats.norm, lambda(x): (x)**2, loc=2, scale=2)
v = mnc2 - m**2
print m, v
print stats.norm.stats(loc=2, scale=2)
print expect(stats.norm, lambda(x): (x)**2, scale=2)
print expect(stats.norm, lambda(x): (x), loc=2)
### for discrete distributions
#based on _drv2_moment(self, n, *args), but streamlined
def expect_discrete(self, fn=None, args=(), loc=0, lb=None, ub=None, conditional=False):
'''calculate expected value of a function with respect to the distribution
for discrete distribution
Parameters
----------
(self : distribution instance as defined in scipy stats)
fn : function (default: identity mapping)
Function for which integral is calculated. Takes only one argument.
args : tuple
argument (parameters) of the distribution
optional keyword parameters
lb, ub : numbers
lower and upper bound for integration, default is set to the support
of the distribution, lb and ub are inclusive (ul<=k<=ub)
conditional : boolean (False)
If true then the expectation is corrected by the conditional
probability of the integration interval. The return value is the
expectation of the function, conditional on being in the given
interval (k such that ul<=k<=ub).
Returns
-------
expected value : float
Notes
-----
* function is not vectorized
* accuracy: uses self.moment_tol as stop criteria
for heavy tailed distribution e.g. zipf(4), accuracy for
mean, var example is only 1e-5,
increasing precision (moment_tol) makes zipf very slow
* suppnmin=100 internal parameter for minimum number of points to evaluate
could be added as keyword parameter, to evaluate functions with
non-monotonic shapes, points include integers in (-suppnmin, suppnmin)
* uses maxcount=1000 limits the number of points that are evaluated
to break loop for infinite sums
(maximum of suppnmin+1000 positive plus suppnmin+1000 negative integers
are evaluated)
'''
#moment_tol = 1e-12 # increase compared to self.moment_tol,
# too slow for only small gain in precision for zipf
#avoid endless loop with unbound integral, eg. var of zipf(2)
maxcount = 1000
suppnmin = 100 #minimum number of points to evaluate (+ and -)
if fn is None:
def fun(x):
#loc and args from outer scope
return (x+loc)*self._pmf(x, *args)
else:
def fun(x):
#loc and args from outer scope
return fn(x+loc)*self._pmf(x, *args)
# used pmf because _pmf does not check support in randint
# and there might be problems(?) with correct self.a, self.b at this stage
# maybe not anymore, seems to work now with _pmf
self._argcheck(*args) # (re)generate scalar self.a and self.b
if lb is None:
lb = (self.a)
if ub is None:
ub = (self.b)
if conditional:
invfac = self.sf(lb,*args) - self.sf(ub+1,*args)
else:
invfac = 1.0
tot = 0.0
low, upp = self._ppf(0.001, *args), self._ppf(0.999, *args)
low = max(min(-suppnmin, low), lb)
upp = min(max(suppnmin, upp), ub)
supp = np.arange(low, upp+1, self.inc) #check limits
#print 'low, upp', low, upp
tot = np.sum(fun(supp))
diff = 1e100
pos = upp + self.inc
count = 0
#handle cases with infinite support
while (pos <= ub) and (diff > self.moment_tol) and count <= maxcount:
diff = fun(pos)
tot += diff
pos += self.inc
count += 1
if self.a < 0: #handle case when self.a = -inf
diff = 1e100
pos = low - self.inc
while (pos >= lb) and (diff > self.moment_tol) and count <= maxcount:
diff = fun(pos)
tot += diff
pos -= self.inc
count += 1
if count > maxcount:
# replace with proper warning
print 'sum did not converge'
return tot/invfac
distdiscrete = [
['bernoulli',(0.3,)],
['binom', (5, 0.4)],
['boltzmann',(1.4, 19)],
['dlaplace', (0.8,)], #0.5
['geom', (0.5,)],
['hypergeom',(30, 12, 6)],
['hypergeom',(21,3,12)], #numpy.random (3,18,12) numpy ticket:921
['hypergeom',(21,18,11)], #numpy.random (18,3,11) numpy ticket:921
['logser', (0.6,)], # reenabled, numpy ticket:921
['nbinom', (5, 0.5)],
['nbinom', (0.4, 0.4)], #from tickets: 583
['planck', (0.51,)], #4.1
['poisson', (0.6,)],
['randint', (7, 31)],
['zipf', (4,)],
['zipf', (8,)],
['zipf', (3,)]]
distdiscrete_ = [
['randint', (7, 31)]
]
for distname, args in distdiscrete:
distfn = getattr(stats, distname)
print '\n', distname, args
m0 = expect_discrete(distfn, lambda(x): 1, args)
m = expect_discrete(distfn, lambda(x): x, args)
mnc2 = expect_discrete(distfn, lambda(x): x**2, args)
v = mnc2 - m*m
md, vd = distfn.stats(*args)
print m0, 'm0', m0-1
print m, v, 'm, v'
print md[()], vd[()], 'md, vd'
print m - md, v - vd, 'm - md, v - vd'
if (np.abs(m-md) > distfn.moment_tol) or (np.abs(v-vd) > distfn.moment_tol):
print '************ diff too large'
loc = 1
ml = expect_discrete(distfn, lambda(x): x, args, loc=loc)
mnc2l = expect_discrete(distfn, lambda(x): x**2, args, loc=loc)
vl = mnc2l - ml*ml
print ml, vl, 'ml, vl'
if (np.abs(ml-md-loc) > distfn.moment_tol) or (np.abs(vl-vd) > distfn.moment_tol):
print '************ diff too large'
print 'check scale, limits and conditional'
print expect_discrete(stats.randint, lambda(x): x, (0,10)),
print expect_discrete(stats.randint, lambda(x): x-1, (1,11)),
print np.dot(np.arange(10),stats.randint.pmf(np.arange(10),0,10))
print expect_discrete(stats.randint, lambda(x): x, (0,5)),
print expect_discrete(stats.randint, lambda(x): x, (0,10), ub=4,
conditional=True)
print expect_discrete(stats.randint, lambda(x): x, (0,10), ub=4,
conditional=False),
print np.dot(np.arange(5),stats.randint.pmf(np.arange(5),0,10))
print expect_discrete(stats.randint, lambda(x): x, (0,10), lb=2, ub=6,
conditional=True),
print expect_discrete(stats.randint, lambda(x): x, (2,7))
#Note: ub is inclusive (ul<=k<=ub), max in randint is exclusive (min<=k<max)
More information about the SciPy-User
mailing list