[Scipy-svn] r5290 - trunk/scipy/stats
scipy-svn at scipy.org
scipy-svn at scipy.org
Fri Dec 26 19:46:26 EST 2008
Author: josef
Date: 2008-12-26 18:46:24 -0600 (Fri, 26 Dec 2008)
New Revision: 5290
Modified:
trunk/scipy/stats/stats.py
Log:
docstring cleaning and add example to ttest_1samp
Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py 2008-12-26 22:03:43 UTC (rev 5289)
+++ trunk/scipy/stats/stats.py 2008-12-27 00:46:24 UTC (rev 5290)
@@ -1897,6 +1897,37 @@
t-statistic
prob : float or array
two-tailed p-value
+
+ Examples
+ --------
+
+ >>> from scipy import stats
+ >>> import numpy as np
+
+ >>> #fix seed to get the same result
+ >>> np.random.seed(7654567)
+ >>> rvs = stats.norm.rvs(loc=5,scale=10,size=(50,2))
+
+ test if mean of random sample is equal to true mean, and different mean.
+ We reject the null hypothesis in the second case and don't reject it in
+ the first case
+
+ >>> stats.ttest_1samp(rvs,5.0)
+ (array([-0.68014479, -0.04323899]), array([ 0.49961383, 0.96568674]))
+ >>> stats.ttest_1samp(rvs,0.0)
+ (array([ 2.77025808, 4.11038784]), array([ 0.00789095, 0.00014999]))
+
+ examples using axis and non-scalar dimension for population mean
+
+ >>> stats.ttest_1samp(rvs,[5.0,0.0])
+ (array([-0.68014479, 4.11038784]), array([ 4.99613833e-01, 1.49986458e-04]))
+ >>> stats.ttest_1samp(rvs.T,[5.0,0.0],axis=1)
+ (array([-0.68014479, 4.11038784]), array([ 4.99613833e-01, 1.49986458e-04]))
+ >>> stats.ttest_1samp(rvs,[[5.0],[0.0]])
+ (array([[-0.68014479, -0.04323899],
+ [ 2.77025808, 4.11038784]]), array([[ 4.99613833e-01, 9.65686743e-01],
+ [ 7.89094663e-03, 1.49986458e-04]]))
+
"""
@@ -1968,17 +1999,19 @@
>>> from scipy import stats
>>> import numpy as np
- #fix seed to get the same result
+ >>> #fix seed to get the same result
>>> np.random.seed(12345678)
- # test with sample with identical means
+ test with sample with identical means
+
>>> rvs1 = stats.norm.rvs(loc=5,scale=10,size=500)
>>> rvs2 = stats.norm.rvs(loc=5,scale=10,size=500)
>>> stats.ttest_ind(rvs1,rvs2)
(0.26833823296239279, 0.78849443369564765)
- # test with sample with different means
+ test with sample with different means
+
>>> rvs3 = stats.norm.rvs(loc=8,scale=10,size=500)
>>> stats.ttest_ind(rvs1,rvs3)
(-5.0434013458585092, 5.4302979468623391e-007)
@@ -2057,7 +2090,7 @@
>>> from scipy import stats
>>> import numpy as np
- #fix random seed to get the same result
+ >>> #fix random seed to get the same result
>>> np.random.seed(12345678)
>>> rvs1 = stats.norm.rvs(loc=5,scale=10,size=500)
>>> rvs2 = stats.norm.rvs(loc=5,scale=10,size=500) + \
@@ -2114,13 +2147,17 @@
----------
rvs : string or array or callable
string: name of a distribution in scipy.stats
+
array: 1-D observations of random variables
+
callable: function to generate random variables,
- requires keyword argument size
+ requires keyword argument `size`
+
cdf : string or callable
string: name of a distribution in scipy.stats
- if rvs is a string then cdf can evaluate to False
- or be the same as rvs
+ if rvs is a string then cdf can evaluate to False
+ or be the same as rvs
+
callable: function to evaluate cdf
args : tuple, sequence
@@ -2131,7 +2168,9 @@
defines the alternative hypothesis (see explanation)
mode : 'approx' (default) or 'asymp'
defines the distribution used for calculating p-value
+
'approx' : use approximation to exact distribution of test statistic
+
'asymp' : use asymptotic distribution of test statistic
@@ -2165,33 +2204,33 @@
>>> kstest(x,'norm')
(0.44435602715924361, 0.038850142705171065)
- # fix random seed to get the same result
+ >>> #fix random seed to get the same result
>>> np.random.seed(987654321)
>>> kstest('norm','',N=100)
(0.058352892479417884, 0.88531190944151261)
is equivalent to this
+
>>> np.random.seed(987654321)
>>> kstest(stats.norm.rvs(size=100),'norm')
(0.058352892479417884, 0.88531190944151261)
- test against one-sided alternative hypothesis
- ---------------------------------------------
+ **test against one-sided alternative hypothesis**
+
>>> np.random.seed(987654321)
- >>> # shift distribution to larger values, so that cdf_dgp(x)< norm.cdf(x)
+ >>> #shift distribution to larger values, so that cdf_dgp(x)< norm.cdf(x)
>>> x = stats.norm.rvs(loc=0.2, size=100)
>>> kstest(x,'norm', alternative = 'less')
(0.12464329735846891, 0.040989164077641749)
- >>> # reject equal distribution against alternative hypothesis: less
+ >>> #reject equal distribution against alternative hypothesis: less
>>> kstest(x,'norm', alternative = 'greater')
(0.0072115233216311081, 0.98531158590396395)
- >>> # don't reject equal distribution against alternative hypothesis: greater
+ >>> #don't reject equal distribution against alternative hypothesis: greater
>>> kstest(x,'norm', mode='asymp')
(0.12464329735846891, 0.08944488871182088)
- testing t distributed random variables against normal distribution
- ------------------------------------------------------------------
+ **testing t distributed random variables against normal distribution**
With 100 degrees of freedom the t distribution looks close to the normal
distribution, and the kstest does not reject the hypothesis that the sample
@@ -2201,7 +2240,6 @@
>>> stats.kstest(stats.t.rvs(100,size=100),'norm')
(0.072018929165471257, 0.67630062862479168)
-
With 3 degrees of freedom the t distribution looks sufficiently different
from the normal distribution, that we can reject the hypothesis that the
sample came from the normal distribution at a alpha=10% level
@@ -2310,14 +2348,15 @@
>>> import numpy as np
>>> from scipy.stats import ks_2samp
- # fix random seed to get the same result
+ >>> #fix random seed to get the same result
>>> np.random.seed(12345678);
>>> n1 = 200 # size of first sample
>>> n2 = 300 # size of second sample
- # different distribution
+ different distribution
we can reject the null hypothesis since the pvalue is below 1%
+
>>> rvs1 = stats.norm.rvs(size=n1,loc=0.,scale=1);
>>> rvs2 = stats.norm.rvs(size=n2,loc=0.5,scale=1.5)
>>> ks_2samp(rvs1,rvs2)
@@ -2326,12 +2365,14 @@
slightly different distribution
we cannot reject the null hypothesis at a 10% or lower alpha since
the pvalue at 0.144 is higher than 10%
+
>>> rvs3 = stats.norm.rvs(size=n2,loc=0.01,scale=1.0)
>>> ks_2samp(rvs1,rvs3)
(0.10333333333333333, 0.14498781825751686)
identical distribution
we cannot reject the null hypothesis since the pvalue is high, 41%
+
>>> rvs4 = stats.norm.rvs(size=n2,loc=0.0,scale=1.0)
>>> ks_2samp(rvs1,rvs4)
(0.07999999999999996, 0.41126949729859719)
More information about the Scipy-svn
mailing list