[Scipy-svn] r6956 - in trunk/scipy/stats: . tests

Sun Nov 28 08:31:44 EST 2010

Author: rgommers
Date: 2010-11-28 07:31:43 -0600 (Sun, 28 Nov 2010)
New Revision: 6956

Modified:
   trunk/scipy/stats/mstats_basic.py
   trunk/scipy/stats/stats.py
   trunk/scipy/stats/tests/test_stats.py
Log:
DEP: remove deprecated functions from stats: std/var/mean/median/cov/corrcoef.

These have been deprecated for over two years. More recent deprecations are
left in for now. Tests that only test numpy functions are removed as well.

Modified: trunk/scipy/stats/mstats_basic.py
===================================================================

--- trunk/scipy/stats/mstats_basic.py	2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/mstats_basic.py	2010-11-28 13:31:43 UTC (rev 6956)
@@ -1921,11 +1921,11 @@
 
 def var(a,axis=None):
     return ma.asarray(a).var(axis=axis,ddof=1)
-var.__doc__ = stats.var.__doc__
+var.__doc__ = np.var.__doc__
 
 def std(a,axis=None):
     return ma.asarray(a).std(axis=axis,ddof=1)
-std.__doc__ = stats.std.__doc__
+std.__doc__ = np.std.__doc__
 
 def stderr(a, axis=0):
     a, axis = _chk_asarray(a, axis)

Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py	2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/stats.py	2010-11-28 13:31:43 UTC (rev 6956)
@@ -39,8 +39,6 @@
 
 CENTRAL TENDENCY:  gmean    (geometric mean)
                    hmean    (harmonic mean)
-                   mean
-                   median
                    medianscore
                    mode
 
@@ -72,8 +70,6 @@
               samplevar
               samplestd
               signaltonoise (for arrays only)
-              var
-              std
               stderr
               sem
               z
@@ -211,16 +207,16 @@
 import _support
 from _support import _chk_asarray, _chk2_asarray
 
-__all__ = ['gmean', 'hmean', 'mean', 'cmedian', 'median', 'mode',
+__all__ = ['gmean', 'hmean', 'cmedian', 'mode',
            'tmean', 'tvar', 'tmin', 'tmax', 'tstd', 'tsem',
            'moment', 'variation', 'skew', 'kurtosis', 'describe',
            'skewtest', 'kurtosistest', 'normaltest',
            'itemfreq', 'scoreatpercentile', 'percentileofscore',
            'histogram', 'histogram2', 'cumfreq', 'relfreq',
            'obrientransform', 'samplevar', 'samplestd', 'signaltonoise',
-           'var', 'std', 'stderr', 'sem', 'z', 'zs', 'zmap', 'zscore',
+           'stderr', 'sem', 'z', 'zs', 'zmap', 'zscore',
            'threshold', 'sigmaclip', 'trimboth', 'trim1', 'trim_mean',
-           'cov', 'corrcoef', 'f_oneway', 'pearsonr', 'fisher_exact',
+           'f_oneway', 'pearsonr', 'fisher_exact',
            'spearmanr', 'pointbiserialr', 'kendalltau', 'linregress',
            'ttest_1samp', 'ttest_ind', 'ttest_rel',
            'kstest', 'chisquare', 'ks_2samp', 'mannwhitneyu',
@@ -535,48 +531,6 @@
         raise ValueError("Harmonic mean only defined if all elements greater than zero")
 
 
-
-def mean(a, axis=0):
-    """
-    Returns the arithmetic mean of m along the given dimension.
-
-    That is: (x1 + x2 + .. + xn) / n
-
-    Parameters
-    ----------
-    a : array
-    axis : int or None
-
-    Returns
-    -------
-    The arithmetic mean computed over a single dimension of the input array or
-    all values in the array if axis=None. The return value will have a floating
-    point dtype even if the input data are integers.
-
-
-    Notes
-    -----
-    scipy.stats.mean is deprecated; please update your code to use numpy.mean.
-
-    Please note that:
-        - numpy.mean axis argument defaults to None, not 0
-        - numpy.mean has a ddof argument to replace bias in a more general
-          manner.
-        - scipy.stats.mean(a, bias=True) can be replaced by ::
-
-             numpy.mean(x, axis=0, ddof=1)
-
-    removed in scipy 0.8.0
-
-    """
-    raise DeprecationWarning("""\
-scipy.stats.mean is deprecated; please update your code to use numpy.mean.
-Please note that:
-    - numpy.mean axis argument defaults to None, not 0
-    - numpy.mean has a ddof argument to replace bias in a more general manner.
-      scipy.stats.mean(a, bias=True) can be replaced by numpy.mean(x,
-axis=0, ddof=1).""")
-
 def cmedian(a, numbins=1000):
     # fixme: numpy.median() always seems to be a better choice.
     # A better version of this function would take already-histogrammed data
@@ -632,32 +586,7 @@
     median = LRL + ((n/2.0-cfbelow)/float(freq))*binsize # MEDIAN
     return median
 
-def median(a, axis=0):
-    # fixme: This would be redundant with numpy.median() except that the latter
-    # does not deal with arbitrary axes.
-    """Returns the median of the passed array along the given axis.
 
-    If there is an even number of entries, the mean of the
-    2 middle values is returned.
-
-    Parameters
-    ----------
-    a : array
-    axis=0 : int
-
-    Returns
-    -------
-    The median of each remaining axis, or of all of the values in the array
-    if axis is None.
-    """
-    raise DeprecationWarning("""\
-scipy.stats.median is deprecated; please update your code to use numpy.median.
-Please note that:
-    - numpy.median axis argument defaults to None, not 0
-    - numpy.median has a ddof argument to replace bias in a more general manner.
-      scipy.stats.median(a, bias=True) can be replaced by numpy.median(x,
-axis=0, ddof=1).""")
-
 def mode(a, axis=0):
     """
     Returns an array of the modal (most common) value in the passed array.
@@ -1741,36 +1670,7 @@
     sd = a.std(axis=axis, ddof=ddof)
     return np.where(sd == 0, 0, m/sd)
 
-def var(a, axis=0, bias=False):
-    """
-Returns the estimated population variance of the values in the passed
-array (i.e., N-1).  Axis can equal None (ravel array first), or an
-integer (the axis over which to operate).
-"""
-    raise DeprecationWarning("""\
-scipy.stats.var is deprecated; please update your code to use numpy.var.
-Please note that:
-    - numpy.var axis argument defaults to None, not 0
-    - numpy.var has a ddof argument to replace bias in a more general manner.
-      scipy.stats.var(a, bias=True) can be replaced by numpy.var(x,
-      axis=0, ddof=0), scipy.stats.var(a, bias=False) by var(x, axis=0,
-      ddof=1).""")
 
-def std(a, axis=0, bias=False):
-    """
-Returns the estimated population standard deviation of the values in
-the passed array (i.e., N-1).  Axis can equal None (ravel array
-first), or an integer (the axis over which to operate).
-"""
-    raise DeprecationWarning("""\
-scipy.stats.std is deprecated; please update your code to use numpy.std.
-Please note that:
-    - numpy.std axis argument defaults to None, not 0
-    - numpy.std has a ddof argument to replace bias in a more general manner.
-      scipy.stats.std(a, bias=True) can be replaced by numpy.std(x,
-      axis=0, ddof=0), scipy.stats.std(a, bias=False) by numpy.std(x, axis=0,
-      ddof=1).""")
-
 @np.lib.deprecate(message="""
 scipy.stats.stderr is deprecated; please update your code to use
 scipy.stats.sem.
@@ -2186,81 +2086,6 @@
     return np.mean(newa,axis=0)
 
 
-
-#####################################
-#####  CORRELATION FUNCTIONS  ######
-#####################################
-
-#  Cov is more flexible than the original
-#    covariance and computes an unbiased covariance matrix
-#    by default.
-def cov(m, y=None, rowvar=False, bias=False):
-    """Estimate the covariance matrix.
-
-    If m is a vector, return the variance.  For matrices where each row
-    is an observation, and each column a variable, return the covariance
-    matrix.  Note that in this case diag(cov(m)) is a vector of
-    variances for each column.
-
-    cov(m) is the same as cov(m, m)
-
-    Normalization is by (N-1) where N is the number of observations
-    (unbiased estimate).  If bias is True then normalization is by N.
-
-    If rowvar is False, then each row is a variable with
-    observations in the columns.
-    """
-    warnings.warn("""\
-scipy.stats.cov is deprecated; please update your code to use numpy.cov.
-Please note that:
-    - numpy.cov rowvar argument defaults to true, not false
-    - numpy.cov bias argument defaults to false, not true
-""", DeprecationWarning)
-    m = asarray(m)
-    if y is None:
-        y = m
-    else:
-        y = asarray(y)
-    if rowvar:
-        m = np.transpose(m)
-        y = np.transpose(y)
-    N = m.shape[0]
-    if (y.shape[0] != N):
-        raise ValueError("x and y must have the same number of observations.")
-    m = m - np.mean(m,axis=0)
-    y = y - np.mean(y,axis=0)
-    if bias:
-        fact = N*1.0
-    else:
-        fact = N-1.0
-    val = np.squeeze(np.dot(np.transpose(m),np.conjugate(y))) / fact
-    return val
-
-def corrcoef(x, y=None, rowvar=False, bias=True):
-    """The correlation coefficients formed from 2-d array x, where the
-    rows are the observations, and the columns are variables.
-
-    corrcoef(x,y) where x and y are 1d arrays is the same as
-    corrcoef(transpose([x,y]))
-
-    If rowvar is True, then each row is a variables with
-    observations in the columns.
-    """
-    warnings.warn("""\
-scipy.stats.corrcoef is deprecated; please update your code to use numpy.corrcoef.
-Please note that:
-    - numpy.corrcoef rowvar argument defaults to true, not false
-    - numpy.corrcoef bias argument defaults to false, not true
-""", DeprecationWarning)
-    if y is not None:
-        x = np.transpose([x,y])
-        y = None
-    c = cov(x, y, rowvar=rowvar, bias=bias)
-    d = np.diag(c)
-    return c/np.sqrt(np.multiply.outer(d,d))
-
-
-
 def f_oneway(*args):
     """
     Performs a 1-way ANOVA.

Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py	2010-11-28 13:31:22 UTC (rev 6955)
+++ trunk/scipy/stats/tests/test_stats.py	2010-11-28 13:31:43 UTC (rev 6956)
@@ -114,14 +114,6 @@
         II. C. Basic Statistics
     """
 
-    def test_meanX(self):
-        y = np.mean(X)
-        assert_almost_equal(y, 5.0)
-
-    def test_stdX(self):
-        y = np.std(X, ddof=1)
-        assert_almost_equal(y, 2.738612788)
-
     def test_tmeanX(self):
         y = stats.tmean(X, (2, 8), (True, True))
         assert_almost_equal(y, 5.0)
@@ -134,14 +126,6 @@
         y = stats.tstd(X, (2, 8), (True, True))
         assert_almost_equal(y, 2.1602468994692865)
 
-    def test_meanZERO(self):
-        y = np.mean(ZERO)
-        assert_almost_equal(y, 0.0)
-
-    def test_stdZERO(self):
-        y = np.std(ZERO, ddof=1)
-        assert_almost_equal(y, 0.0)
-
 ##    Really need to write these tests to handle missing values properly
 ##    def test_meanMISS(self):
 ##        y = np.mean(MISS)
@@ -151,47 +135,7 @@
 ##        y = stats.stdev(MISS)
 ##        assert_almost_equal(y, 0.0)
 
-    def test_meanBIG(self):
-        y = np.mean(BIG)
 
-        assert_almost_equal(y, 99999995.00)
-
-    def test_stdBIG(self):
-        y = np.std(BIG, ddof=1)
-        assert_almost_equal(y, 2.738612788)
-
-    def test_meanLITTLE(self):
-        y = np.mean(LITTLE)
-        assert_approx_equal(y, 0.999999950)
-
-    def test_stdLITTLE(self):
-        y = np.std(LITTLE, ddof=1)
-        assert_approx_equal(y, 2.738612788e-8)
-
-    def test_meanHUGE(self):
-        y = np.mean(HUGE)
-        assert_approx_equal(y, 5.00000e+12)
-
-    def test_stdHUGE(self):
-        y = np.std(HUGE, ddof=1)
-        assert_approx_equal(y, 2.738612788e12)
-
-    def test_meanTINY(self):
-        y = np.mean(TINY)
-        assert_almost_equal(y, 0.0)
-
-    def test_stdTINY(self):
-        y = np.std(TINY, ddof=1)
-        assert_almost_equal(y, 0.0)
-
-    def test_meanROUND(self):
-        y = np.mean(ROUND)
-        assert_approx_equal(y, 4.500000000)
-
-    def test_stdROUND(self):
-        y = np.std(ROUND, ddof=1)
-        assert_approx_equal(y, 2.738612788)
-
 class TestNanFunc(TestCase):
     def __init__(self, *args, **kw):
         TestCase.__init__(self, *args, **kw)
@@ -924,55 +868,12 @@
         assert_array_almost_equal(actual1, desired1, decimal=14)
 
 
-class TestMean(TestCase):
-    def test_basic(self):
-        a = [3,4,5,10,-3,-5,6]
-        af = [3.,4,5,10,-3,-5,-6]
-        Na = len(a)
-        Naf = len(af)
-        mn1 = 0.0
-        for el in a:
-            mn1 += el / float(Na)
-        assert_almost_equal(np.mean(a),mn1,11)
-        mn2 = 0.0
-        for el in af:
-            mn2 += el / float(Naf)
-        assert_almost_equal(np.mean(af),mn2,11)
-
-    def test_2d(self):
-        a = [[1.0, 2.0, 3.0],
-             [2.0, 4.0, 6.0],
-             [8.0, 12.0, 7.0]]
-        A = array(a)
-        N1, N2 = (3, 3)
-        mn1 = zeros(N2, dtype=float)
-        for k in range(N1):
-            mn1 += A[k,:] / N1
-        assert_almost_equal(np.mean(a, axis=0), mn1, decimal=13)
-        mn2 = zeros(N1, dtype=float)
-        for k in range(N2):
-            mn2 += A[:,k]
-        mn2 /= N2
-        assert_almost_equal(np.mean(a, axis=1), mn2, decimal=13)
-
-    def test_ravel(self):
-        a = rand(5,3,5)
-        A = 0
-        for val in ravel(a):
-            A += val
-        assert_almost_equal(np.mean(a,axis=None),A/(5*3.0*5))
-
 class TestPercentile(TestCase):
     def setUp(self):
         self.a1 = [3,4,5,10,-3,-5,6]
         self.a2 = [3,-6,-2,8,7,4,2,1]
         self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
 
-    def test_median(self):
-        assert_equal(np.median(self.a1), 4)
-        assert_equal(np.median(self.a2), 2.5)
-        assert_equal(np.median(self.a3), 3.5)
-
     def test_percentile(self):
         x = arange(8) * 0.5
         assert_equal(stats.scoreatpercentile(x, 0), 0.)
@@ -989,24 +890,6 @@
                            [1,1,1])
 
 
-class TestStd(TestCase):
-    def test_basic(self):
-        a = [3,4,5,10,-3,-5,6]
-        b = [3,4,5,10,-3,-5,-6]
-        assert_almost_equal(np.std(a, ddof=1),5.2098807225172772,11)
-        assert_almost_equal(np.std(b, ddof=1),5.9281411203561225,11)
-
-    def test_2d(self):
-        a = [[1.0, 2.0, 3.0],
-             [2.0, 4.0, 6.0],
-             [8.0, 12.0, 7.0]]
-        b1 = array((3.7859388972001824, 5.2915026221291814,
-                    2.0816659994661335))
-        b2 = array((1.0,2.0,2.64575131106))
-        assert_array_almost_equal(np.std(a,ddof=1,axis=0),b1,11)
-        assert_array_almost_equal(np.std(a,ddof=1,axis=1),b2,11)
-
-
 class TestCMedian(TestCase):
     def test_basic(self):
         data = [1,2,3,1,5,3,6,4,3,2,4,3,5,2.0]
@@ -1014,28 +897,7 @@
         assert_almost_equal(stats.cmedian(data,3),3.083333333333333)
         assert_almost_equal(stats.cmedian(data),3.0020020020020022)
 
-class TestMedian(TestCase):
-    def test_basic(self):
-        data1 = [1,3,5,2,3,1,19,-10,2,4.0]
-        data2 = [3,5,1,10,23,-10,3,-2,6,8,15]
-        assert_almost_equal(np.median(data1),2.5)
-        assert_almost_equal(np.median(data2),5)
 
-    def test_basic2(self):
-        a1 = [3,4,5,10,-3,-5,6]
-        a2 = [3,-6,-2,8,7,4,2,1]
-        a3 = [3.,4,5,10,-3,-5,-6,7.0]
-        assert_equal(np.median(a1),4)
-        assert_equal(np.median(a2),2.5)
-        assert_equal(np.median(a3),3.5)
-
-    def test_axis(self):
-        """Regression test for #760."""
-        a1 = np.array([[3,4,5], [10,-3,-5]])
-        assert_equal(np.median(a1), 3.5)
-        assert_equal(np.median(a1, axis=0), np.array([6.5, 0.5, 0.]))
-        assert_equal(np.median(a1, axis=-1), np.array([4., -3]))
-
 class TestMode(TestCase):
     def test_basic(self):
         data1 = [3,5,1,10,23,3,2,6,8,6,10,6]
@@ -1049,20 +911,7 @@
          note that length(testcase) = 4
     """
     testcase = [1,2,3,4]
-    def test_std(self):
-        y = np.std(self.testcase, ddof=1)
-        assert_approx_equal(y,1.290994449)
 
-    def test_var(self):
-        """
-        var(testcase) = 1.666666667 """
-        #y = stats.var(self.shoes[0])
-        #assert_approx_equal(y,6.009)
-        y = np.var(self.testcase)
-        assert_approx_equal(y,1.25)
-        y = np.var(self.testcase, ddof=1)
-        assert_approx_equal(y,1.666666667)
-
     def test_samplevar(self):
         """
         R does not have 'samplevar' so the following was used