[Scipy-svn] r6956 - in trunk/scipy/stats: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Nov 28 08:31:44 EST 2010
Author: rgommers
Date: 2010-11-28 07:31:43 -0600 (Sun, 28 Nov 2010)
New Revision: 6956
Modified:
trunk/scipy/stats/mstats_basic.py
trunk/scipy/stats/stats.py
trunk/scipy/stats/tests/test_stats.py
Log:
DEP: remove deprecated functions from stats: std/var/mean/median/cov/corrcoef.
These have been deprecated for over two years. More recent deprecations are
left in for now. Tests that only test numpy functions are removed as well.
Modified: trunk/scipy/stats/mstats_basic.py
===================================================================
--- trunk/scipy/stats/mstats_basic.py 2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/mstats_basic.py 2010-11-28 13:31:43 UTC (rev 6956)
@@ -1921,11 +1921,11 @@
def var(a,axis=None):
return ma.asarray(a).var(axis=axis,ddof=1)
-var.__doc__ = stats.var.__doc__
+var.__doc__ = np.var.__doc__
def std(a,axis=None):
return ma.asarray(a).std(axis=axis,ddof=1)
-std.__doc__ = stats.std.__doc__
+std.__doc__ = np.std.__doc__
def stderr(a, axis=0):
a, axis = _chk_asarray(a, axis)
Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py 2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/stats.py 2010-11-28 13:31:43 UTC (rev 6956)
@@ -39,8 +39,6 @@
CENTRAL TENDENCY: gmean (geometric mean)
hmean (harmonic mean)
- mean
- median
medianscore
mode
@@ -72,8 +70,6 @@
samplevar
samplestd
signaltonoise (for arrays only)
- var
- std
stderr
sem
z
@@ -211,16 +207,16 @@
import _support
from _support import _chk_asarray, _chk2_asarray
-__all__ = ['gmean', 'hmean', 'mean', 'cmedian', 'median', 'mode',
+__all__ = ['gmean', 'hmean', 'cmedian', 'mode',
'tmean', 'tvar', 'tmin', 'tmax', 'tstd', 'tsem',
'moment', 'variation', 'skew', 'kurtosis', 'describe',
'skewtest', 'kurtosistest', 'normaltest',
'itemfreq', 'scoreatpercentile', 'percentileofscore',
'histogram', 'histogram2', 'cumfreq', 'relfreq',
'obrientransform', 'samplevar', 'samplestd', 'signaltonoise',
- 'var', 'std', 'stderr', 'sem', 'z', 'zs', 'zmap', 'zscore',
+ 'stderr', 'sem', 'z', 'zs', 'zmap', 'zscore',
'threshold', 'sigmaclip', 'trimboth', 'trim1', 'trim_mean',
- 'cov', 'corrcoef', 'f_oneway', 'pearsonr', 'fisher_exact',
+ 'f_oneway', 'pearsonr', 'fisher_exact',
'spearmanr', 'pointbiserialr', 'kendalltau', 'linregress',
'ttest_1samp', 'ttest_ind', 'ttest_rel',
'kstest', 'chisquare', 'ks_2samp', 'mannwhitneyu',
@@ -535,48 +531,6 @@
raise ValueError("Harmonic mean only defined if all elements greater than zero")
-
-def mean(a, axis=0):
- """
- Returns the arithmetic mean of m along the given dimension.
-
- That is: (x1 + x2 + .. + xn) / n
-
- Parameters
- ----------
- a : array
- axis : int or None
-
- Returns
- -------
- The arithmetic mean computed over a single dimension of the input array or
- all values in the array if axis=None. The return value will have a floating
- point dtype even if the input data are integers.
-
-
- Notes
- -----
- scipy.stats.mean is deprecated; please update your code to use numpy.mean.
-
- Please note that:
- - numpy.mean axis argument defaults to None, not 0
- - numpy.mean has a ddof argument to replace bias in a more general
- manner.
- - scipy.stats.mean(a, bias=True) can be replaced by ::
-
- numpy.mean(x, axis=0, ddof=1)
-
- removed in scipy 0.8.0
-
- """
- raise DeprecationWarning("""\
-scipy.stats.mean is deprecated; please update your code to use numpy.mean.
-Please note that:
- - numpy.mean axis argument defaults to None, not 0
- - numpy.mean has a ddof argument to replace bias in a more general manner.
- scipy.stats.mean(a, bias=True) can be replaced by numpy.mean(x,
-axis=0, ddof=1).""")
-
def cmedian(a, numbins=1000):
# fixme: numpy.median() always seems to be a better choice.
# A better version of this function would take already-histogrammed data
@@ -632,32 +586,7 @@
median = LRL + ((n/2.0-cfbelow)/float(freq))*binsize # MEDIAN
return median
-def median(a, axis=0):
- # fixme: This would be redundant with numpy.median() except that the latter
- # does not deal with arbitrary axes.
- """Returns the median of the passed array along the given axis.
- If there is an even number of entries, the mean of the
- 2 middle values is returned.
-
- Parameters
- ----------
- a : array
- axis=0 : int
-
- Returns
- -------
- The median of each remaining axis, or of all of the values in the array
- if axis is None.
- """
- raise DeprecationWarning("""\
-scipy.stats.median is deprecated; please update your code to use numpy.median.
-Please note that:
- - numpy.median axis argument defaults to None, not 0
- - numpy.median has a ddof argument to replace bias in a more general manner.
- scipy.stats.median(a, bias=True) can be replaced by numpy.median(x,
-axis=0, ddof=1).""")
-
def mode(a, axis=0):
"""
Returns an array of the modal (most common) value in the passed array.
@@ -1741,36 +1670,7 @@
sd = a.std(axis=axis, ddof=ddof)
return np.where(sd == 0, 0, m/sd)
-def var(a, axis=0, bias=False):
- """
-Returns the estimated population variance of the values in the passed
-array (i.e., N-1). Axis can equal None (ravel array first), or an
-integer (the axis over which to operate).
-"""
- raise DeprecationWarning("""\
-scipy.stats.var is deprecated; please update your code to use numpy.var.
-Please note that:
- - numpy.var axis argument defaults to None, not 0
- - numpy.var has a ddof argument to replace bias in a more general manner.
- scipy.stats.var(a, bias=True) can be replaced by numpy.var(x,
- axis=0, ddof=0), scipy.stats.var(a, bias=False) by var(x, axis=0,
- ddof=1).""")
-def std(a, axis=0, bias=False):
- """
-Returns the estimated population standard deviation of the values in
-the passed array (i.e., N-1). Axis can equal None (ravel array
-first), or an integer (the axis over which to operate).
-"""
- raise DeprecationWarning("""\
-scipy.stats.std is deprecated; please update your code to use numpy.std.
-Please note that:
- - numpy.std axis argument defaults to None, not 0
- - numpy.std has a ddof argument to replace bias in a more general manner.
- scipy.stats.std(a, bias=True) can be replaced by numpy.std(x,
- axis=0, ddof=0), scipy.stats.std(a, bias=False) by numpy.std(x, axis=0,
- ddof=1).""")
-
@np.lib.deprecate(message="""
scipy.stats.stderr is deprecated; please update your code to use
scipy.stats.sem.
@@ -2186,81 +2086,6 @@
return np.mean(newa,axis=0)
-
-#####################################
-##### CORRELATION FUNCTIONS ######
-#####################################
-
-# Cov is more flexible than the original
-# covariance and computes an unbiased covariance matrix
-# by default.
-def cov(m, y=None, rowvar=False, bias=False):
- """Estimate the covariance matrix.
-
- If m is a vector, return the variance. For matrices where each row
- is an observation, and each column a variable, return the covariance
- matrix. Note that in this case diag(cov(m)) is a vector of
- variances for each column.
-
- cov(m) is the same as cov(m, m)
-
- Normalization is by (N-1) where N is the number of observations
- (unbiased estimate). If bias is True then normalization is by N.
-
- If rowvar is False, then each row is a variable with
- observations in the columns.
- """
- warnings.warn("""\
-scipy.stats.cov is deprecated; please update your code to use numpy.cov.
-Please note that:
- - numpy.cov rowvar argument defaults to true, not false
- - numpy.cov bias argument defaults to false, not true
-""", DeprecationWarning)
- m = asarray(m)
- if y is None:
- y = m
- else:
- y = asarray(y)
- if rowvar:
- m = np.transpose(m)
- y = np.transpose(y)
- N = m.shape[0]
- if (y.shape[0] != N):
- raise ValueError("x and y must have the same number of observations.")
- m = m - np.mean(m,axis=0)
- y = y - np.mean(y,axis=0)
- if bias:
- fact = N*1.0
- else:
- fact = N-1.0
- val = np.squeeze(np.dot(np.transpose(m),np.conjugate(y))) / fact
- return val
-
-def corrcoef(x, y=None, rowvar=False, bias=True):
- """The correlation coefficients formed from 2-d array x, where the
- rows are the observations, and the columns are variables.
-
- corrcoef(x,y) where x and y are 1d arrays is the same as
- corrcoef(transpose([x,y]))
-
- If rowvar is True, then each row is a variables with
- observations in the columns.
- """
- warnings.warn("""\
-scipy.stats.corrcoef is deprecated; please update your code to use numpy.corrcoef.
-Please note that:
- - numpy.corrcoef rowvar argument defaults to true, not false
- - numpy.corrcoef bias argument defaults to false, not true
-""", DeprecationWarning)
- if y is not None:
- x = np.transpose([x,y])
- y = None
- c = cov(x, y, rowvar=rowvar, bias=bias)
- d = np.diag(c)
- return c/np.sqrt(np.multiply.outer(d,d))
-
-
-
def f_oneway(*args):
"""
Performs a 1-way ANOVA.
Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py 2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/tests/test_stats.py 2010-11-28 13:31:43 UTC (rev 6956)
@@ -114,14 +114,6 @@
II. C. Basic Statistics
"""
- def test_meanX(self):
- y = np.mean(X)
- assert_almost_equal(y, 5.0)
-
- def test_stdX(self):
- y = np.std(X, ddof=1)
- assert_almost_equal(y, 2.738612788)
-
def test_tmeanX(self):
y = stats.tmean(X, (2, 8), (True, True))
assert_almost_equal(y, 5.0)
@@ -134,14 +126,6 @@
y = stats.tstd(X, (2, 8), (True, True))
assert_almost_equal(y, 2.1602468994692865)
- def test_meanZERO(self):
- y = np.mean(ZERO)
- assert_almost_equal(y, 0.0)
-
- def test_stdZERO(self):
- y = np.std(ZERO, ddof=1)
- assert_almost_equal(y, 0.0)
-
## Really need to write these tests to handle missing values properly
## def test_meanMISS(self):
## y = np.mean(MISS)
@@ -151,47 +135,7 @@
## y = stats.stdev(MISS)
## assert_almost_equal(y, 0.0)
- def test_meanBIG(self):
- y = np.mean(BIG)
- assert_almost_equal(y, 99999995.00)
-
- def test_stdBIG(self):
- y = np.std(BIG, ddof=1)
- assert_almost_equal(y, 2.738612788)
-
- def test_meanLITTLE(self):
- y = np.mean(LITTLE)
- assert_approx_equal(y, 0.999999950)
-
- def test_stdLITTLE(self):
- y = np.std(LITTLE, ddof=1)
- assert_approx_equal(y, 2.738612788e-8)
-
- def test_meanHUGE(self):
- y = np.mean(HUGE)
- assert_approx_equal(y, 5.00000e+12)
-
- def test_stdHUGE(self):
- y = np.std(HUGE, ddof=1)
- assert_approx_equal(y, 2.738612788e12)
-
- def test_meanTINY(self):
- y = np.mean(TINY)
- assert_almost_equal(y, 0.0)
-
- def test_stdTINY(self):
- y = np.std(TINY, ddof=1)
- assert_almost_equal(y, 0.0)
-
- def test_meanROUND(self):
- y = np.mean(ROUND)
- assert_approx_equal(y, 4.500000000)
-
- def test_stdROUND(self):
- y = np.std(ROUND, ddof=1)
- assert_approx_equal(y, 2.738612788)
-
class TestNanFunc(TestCase):
def __init__(self, *args, **kw):
TestCase.__init__(self, *args, **kw)
@@ -924,55 +868,12 @@
assert_array_almost_equal(actual1, desired1, decimal=14)
-class TestMean(TestCase):
- def test_basic(self):
- a = [3,4,5,10,-3,-5,6]
- af = [3.,4,5,10,-3,-5,-6]
- Na = len(a)
- Naf = len(af)
- mn1 = 0.0
- for el in a:
- mn1 += el / float(Na)
- assert_almost_equal(np.mean(a),mn1,11)
- mn2 = 0.0
- for el in af:
- mn2 += el / float(Naf)
- assert_almost_equal(np.mean(af),mn2,11)
-
- def test_2d(self):
- a = [[1.0, 2.0, 3.0],
- [2.0, 4.0, 6.0],
- [8.0, 12.0, 7.0]]
- A = array(a)
- N1, N2 = (3, 3)
- mn1 = zeros(N2, dtype=float)
- for k in range(N1):
- mn1 += A[k,:] / N1
- assert_almost_equal(np.mean(a, axis=0), mn1, decimal=13)
- mn2 = zeros(N1, dtype=float)
- for k in range(N2):
- mn2 += A[:,k]
- mn2 /= N2
- assert_almost_equal(np.mean(a, axis=1), mn2, decimal=13)
-
- def test_ravel(self):
- a = rand(5,3,5)
- A = 0
- for val in ravel(a):
- A += val
- assert_almost_equal(np.mean(a,axis=None),A/(5*3.0*5))
-
class TestPercentile(TestCase):
def setUp(self):
self.a1 = [3,4,5,10,-3,-5,6]
self.a2 = [3,-6,-2,8,7,4,2,1]
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
- def test_median(self):
- assert_equal(np.median(self.a1), 4)
- assert_equal(np.median(self.a2), 2.5)
- assert_equal(np.median(self.a3), 3.5)
-
def test_percentile(self):
x = arange(8) * 0.5
assert_equal(stats.scoreatpercentile(x, 0), 0.)
@@ -989,24 +890,6 @@
[1,1,1])
-class TestStd(TestCase):
- def test_basic(self):
- a = [3,4,5,10,-3,-5,6]
- b = [3,4,5,10,-3,-5,-6]
- assert_almost_equal(np.std(a, ddof=1),5.2098807225172772,11)
- assert_almost_equal(np.std(b, ddof=1),5.9281411203561225,11)
-
- def test_2d(self):
- a = [[1.0, 2.0, 3.0],
- [2.0, 4.0, 6.0],
- [8.0, 12.0, 7.0]]
- b1 = array((3.7859388972001824, 5.2915026221291814,
- 2.0816659994661335))
- b2 = array((1.0,2.0,2.64575131106))
- assert_array_almost_equal(np.std(a,ddof=1,axis=0),b1,11)
- assert_array_almost_equal(np.std(a,ddof=1,axis=1),b2,11)
-
-
class TestCMedian(TestCase):
def test_basic(self):
data = [1,2,3,1,5,3,6,4,3,2,4,3,5,2.0]
@@ -1014,28 +897,7 @@
assert_almost_equal(stats.cmedian(data,3),3.083333333333333)
assert_almost_equal(stats.cmedian(data),3.0020020020020022)
-class TestMedian(TestCase):
- def test_basic(self):
- data1 = [1,3,5,2,3,1,19,-10,2,4.0]
- data2 = [3,5,1,10,23,-10,3,-2,6,8,15]
- assert_almost_equal(np.median(data1),2.5)
- assert_almost_equal(np.median(data2),5)
- def test_basic2(self):
- a1 = [3,4,5,10,-3,-5,6]
- a2 = [3,-6,-2,8,7,4,2,1]
- a3 = [3.,4,5,10,-3,-5,-6,7.0]
- assert_equal(np.median(a1),4)
- assert_equal(np.median(a2),2.5)
- assert_equal(np.median(a3),3.5)
-
- def test_axis(self):
- """Regression test for #760."""
- a1 = np.array([[3,4,5], [10,-3,-5]])
- assert_equal(np.median(a1), 3.5)
- assert_equal(np.median(a1, axis=0), np.array([6.5, 0.5, 0.]))
- assert_equal(np.median(a1, axis=-1), np.array([4., -3]))
-
class TestMode(TestCase):
def test_basic(self):
data1 = [3,5,1,10,23,3,2,6,8,6,10,6]
@@ -1049,20 +911,7 @@
note that length(testcase) = 4
"""
testcase = [1,2,3,4]
- def test_std(self):
- y = np.std(self.testcase, ddof=1)
- assert_approx_equal(y,1.290994449)
- def test_var(self):
- """
- var(testcase) = 1.666666667 """
- #y = stats.var(self.shoes[0])
- #assert_approx_equal(y,6.009)
- y = np.var(self.testcase)
- assert_approx_equal(y,1.25)
- y = np.var(self.testcase, ddof=1)
- assert_approx_equal(y,1.666666667)
-
def test_samplevar(self):
"""
R does not have 'samplevar' so the following was used
More information about the Scipy-svn
mailing list