[Scipy-svn] r5160 - in trunk/scipy/stats: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Nov 22 00:26:54 EST 2008


Author: josef
Date: 2008-11-21 23:26:45 -0600 (Fri, 21 Nov 2008)
New Revision: 5160

Added:
   trunk/scipy/stats/mstats_basic.py
   trunk/scipy/stats/mstats_extras.py
   trunk/scipy/stats/tests/test_mstats_basic.py
   trunk/scipy/stats/tests/test_mstats_extras.py
Removed:
   trunk/scipy/stats/mmorestats.py
   trunk/scipy/stats/mstats.py
   trunk/scipy/stats/tests/test_mmorestats.py
   trunk/scipy/stats/tests/test_mstats.py
Log:
rename masked array stats files

Deleted: trunk/scipy/stats/mmorestats.py
===================================================================
--- trunk/scipy/stats/mmorestats.py	2008-11-21 21:09:29 UTC (rev 5159)
+++ trunk/scipy/stats/mmorestats.py	2008-11-22 05:26:45 UTC (rev 5160)
@@ -1,385 +0,0 @@
-"""
-Additional statistics functions, with support to MA.
-
-:author: Pierre GF Gerard-Marchant
-:contact: pierregm_at_uga_edu
-:date: $Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $
-:version: $Id: morestats.py 3473 2007-10-29 15:18:13Z jarrod.millman $
-"""
-__author__ = "Pierre GF Gerard-Marchant"
-__docformat__ = "restructuredtext en"
-
-
-__all__ = ['compare_median_ms',
-           'hdquantiles', 'hdmedian', 'hdquantiles_sd',
-           'idealfourths',
-           'median_cihs','mjci','mquantiles_cimj',
-           'rsh',
-           'trimmed_mean_ci',]
-
-import numpy as np
-from numpy import float_, int_, ndarray
-
-import numpy.ma as ma
-from numpy.ma import MaskedArray
-
-import scipy.stats.mstats as mstats
-
-from scipy.stats.distributions import norm, beta, t, binom
-
-
-#####--------------------------------------------------------------------------
-#---- --- Quantiles ---
-#####--------------------------------------------------------------------------
-def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
-    """Computes quantile estimates with the Harrell-Davis method, where the estimates
-are calculated as a weighted linear combination of order statistics.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    var : boolean
-        Whether to return the variance of the estimate.
-
-Returns
--------
-    A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles
-    and variances (if ``var`` is True), where ``p`` is the number of quantiles.
-
-Notes
------
-    The function is restricted to 2D arrays.
-
-    """
-    def _hd_1D(data,prob,var):
-        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
-        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
-        # Don't use length here, in case we have a numpy scalar
-        n = xsorted.size
-        #.........
-        hd = np.empty((2,len(prob)), float_)
-        if n < 2:
-            hd.flat = np.nan
-            if var:
-                return hd
-            return hd[0]
-        #.........
-        v = np.arange(n+1) / float(n)
-        betacdf = beta.cdf
-        for (i,p) in enumerate(prob):
-            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
-            w = _w[1:] - _w[:-1]
-            hd_mean = np.dot(w, xsorted)
-            hd[0,i] = hd_mean
-            #
-            hd[1,i] = np.dot(w, (xsorted-hd_mean)**2)
-            #
-        hd[0, prob == 0] = xsorted[0]
-        hd[0, prob == 1] = xsorted[-1]
-        if var:
-            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
-            return hd
-        return hd[0]
-    # Initialization & checks ---------
-    data = ma.array(data, copy=False, dtype=float_)
-    p = np.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None) or (data.ndim == 1):
-        result = _hd_1D(data, p, var)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)
-    #
-    return ma.fix_invalid(result, copy=False)
-
-#..............................................................................
-def hdmedian(data, axis=-1, var=False):
-    """Returns the Harrell-Davis estimate of the median along the given axis.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    var : boolean
-        Whether to return the variance of the estimate.
-
-    """
-    result = hdquantiles(data,[0.5], axis=axis, var=var)
-    return result.squeeze()
-
-
-#..............................................................................
-def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
-    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.
-
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-
-Notes
------
-    The function is restricted to 2D arrays.
-
-    """
-    def _hdsd_1D(data,prob):
-        "Computes the std error for 1D arrays."
-        xsorted = np.sort(data.compressed())
-        n = len(xsorted)
-        #.........
-        hdsd = np.empty(len(prob), float_)
-        if n < 2:
-            hdsd.flat = np.nan
-        #.........
-        vv = np.arange(n) / float(n-1)
-        betacdf = beta.cdf
-        #
-        for (i,p) in enumerate(prob):
-            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
-            w = _w[1:] - _w[:-1]
-            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[range(0,k),
-                                                      range(k+1,n)].astype(int_)])
-                                  for k in range(n)], dtype=float_)
-            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
-            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
-        return hdsd
-    # Initialization & checks ---------
-    data = ma.array(data, copy=False, dtype=float_)
-    p = np.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        result = _hdsd_1D(data, p)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
-    #
-    return ma.fix_invalid(result, copy=False).ravel()
-
-
-#####--------------------------------------------------------------------------
-#---- --- Confidence intervals ---
-#####--------------------------------------------------------------------------
-
-def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
-                    alpha=0.05, axis=None):
-    """Returns the selected confidence interval of the trimmed mean along the
-given axis.
-
-Parameters
-----------
-    data : sequence
-        Input data. The data is transformed to a masked array
-    proportiontocut : float
-        Proportion of the data to cut from each side of the data .
-        As a result, (2*proportiontocut*n) values are actually trimmed.
-    alpha : float
-        Confidence level of the intervals.
-    inclusive : tuple of boolean
-        If relative==False, tuple indicating whether values exactly equal to the
-        absolute limits are allowed.
-        If relative==True, tuple indicating whether the number of data being masked
-        on each side should be rounded (True) or truncated (False).
-    axis : int
-        Axis along which to cut. If None, uses a flattened version of the input.
-
-    """
-    data = ma.array(data, copy=False)
-    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
-    tmean = trimmed.mean(axis)
-    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
-    df = trimmed.count(axis) - 1
-    tppf = t.ppf(1-alpha/2.,df)
-    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
-
-#..............................................................................
-def mjci(data, prob=[0.25,0.5,0.75], axis=None):
-    """Returns the Maritz-Jarrett estimators of the standard error of selected
-experimental quantiles of the data.
-
-Parameters
------------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-
-    """
-    def _mjci_1D(data, p):
-        data = np.sort(data.compressed())
-        n = data.size
-        prob = (np.array(p) * n + 0.5).astype(int_)
-        betacdf = beta.cdf
-        #
-        mj = np.empty(len(prob), float_)
-        x = np.arange(1,n+1, dtype=float_) / n
-        y = x - 1./n
-        for (i,m) in enumerate(prob):
-            (m1,m2) = (m-1, n-m)
-            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
-            C1 = np.dot(W,data)
-            C2 = np.dot(W,data**2)
-            mj[i] = np.sqrt(C2 - C1**2)
-        return mj
-    #
-    data = ma.array(data, copy=False)
-    assert data.ndim <= 2, "Array should be 2D at most !"
-    p = np.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        return _mjci_1D(data, p)
-    else:
-        return ma.apply_along_axis(_mjci_1D, axis, data, p)
-
-#..............................................................................
-def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
-    """Computes the alpha confidence interval for the selected quantiles of the
-data, with Maritz-Jarrett estimators.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    alpha : float
-        Confidence level of the intervals.
-    axis : integer
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    """
-    alpha = min(alpha, 1-alpha)
-    z = norm.ppf(1-alpha/2.)
-    xq = mstats.mquantiles(data, prob, alphap=0, betap=0, axis=axis)
-    smj = mjci(data, prob, axis=axis)
-    return (xq - z * smj, xq + z * smj)
-
-
-#.............................................................................
-def median_cihs(data, alpha=0.05, axis=None):
-    """Computes the alpha-level confidence interval for the median of the data,
-following the Hettmasperger-Sheather method.
-
-Parameters
-----------
-    data : sequence
-        Input data. Masked values are discarded. The input should be 1D only, or
-        axis should be set to None.
-    alpha : float
-        Confidence level of the intervals.
-    axis : integer
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    """
-    def _cihs_1D(data, alpha):
-        data = np.sort(data.compressed())
-        n = len(data)
-        alpha = min(alpha, 1-alpha)
-        k = int(binom._ppf(alpha/2., n, 0.5))
-        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
-        if gk < 1-alpha:
-            k -= 1
-            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
-        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
-        I = (gk - 1 + alpha)/(gk - gkk)
-        lambd = (n-k) * I / float(k + (n-2*k)*I)
-        lims = (lambd*data[k] + (1-lambd)*data[k-1],
-                lambd*data[n-k-1] + (1-lambd)*data[n-k])
-        return lims
-    data = ma.rray(data, copy=False)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        result = _cihs_1D(data.compressed(), alpha)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = ma.apply_along_axis(_cihs_1D, axis, data, alpha)
-    #
-    return result
-
-#..............................................................................
-def compare_medians_ms(group_1, group_2, axis=None):
-    """Compares the medians from two independent groups along the given axis.
-
-The comparison is performed using the McKean-Schrader estimate of the standard
-error of the medians.
-
-Parameters
-----------
-    group_1 : {sequence}
-        First dataset.
-    group_2 : {sequence}
-        Second dataset.
-    axis : {integer}
-        Axis along which the medians are estimated. If None, the arrays are flattened.
-
-Returns
--------
-    A (p,) array of comparison values.
-
-    """
-    (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis))
-    (std_1, std_2) = (mstats.stde_median(group_1, axis=axis),
-                      mstats.stde_median(group_2, axis=axis))
-    W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2)
-    return 1 - norm.cdf(W)
-
-
-def idealfourths(data, axis=None):
-    """Returns an estimate of the lower and upper quartiles of the data along
-    the given axis, as computed with the ideal fourths.
-    """
-    def _idf(data):
-        x = data.compressed()
-        n = len(x)
-        if n < 3:
-            return [np.nan,np.nan]
-        (j,h) = divmod(n/4. + 5/12.,1)
-        qlo = (1-h)*x[j-1] + h*x[j]
-        k = n - j
-        qup = (1-h)*x[k] + h*x[k-1]
-        return [qlo, qup]
-    data = ma.sort(data, axis=axis).view(MaskedArray)
-    if (axis is None):
-        return _idf(data)
-    else:
-        return ma.apply_along_axis(_idf, axis, data)
-
-
-def rsh(data, points=None):
-    """Evaluates Rosenblatt's shifted histogram estimators for each point
-on the dataset 'data'.
-
-Parameters
-    data : sequence
-        Input data. Masked values are ignored.
-    points : sequence
-        Sequence of points where to evaluate Rosenblatt shifted histogram.
-        If None, use the data.
-    """
-    data = ma.array(data, copy=False)
-    if points is None:
-        points = data
-    else:
-        points = np.array(points, copy=False, ndmin=1)
-    if data.ndim != 1:
-        raise AttributeError("The input array should be 1D only !")
-    n = data.count()
-    r = idealfourths(data, axis=None)
-    h = 1.2 * (r[-1]-r[0]) / n**(1./5)
-    nhi = (data[:,None] <= points[None,:] + h).sum(0)
-    nlo = (data[:,None] < points[None,:] - h).sum(0)
-    return (nhi-nlo) / (2.*n*h)
-
-
-###############################################################################

Deleted: trunk/scipy/stats/mstats.py
===================================================================
--- trunk/scipy/stats/mstats.py	2008-11-21 21:09:29 UTC (rev 5159)
+++ trunk/scipy/stats/mstats.py	2008-11-22 05:26:45 UTC (rev 5160)
@@ -1,1914 +0,0 @@
-"""
-An extension of scipy.stats.stats to support masked arrays
-
-:author: Pierre GF Gerard-Marchant
-:contact: pierregm_at_uga_edu
-"""
-#TODO : f_value_wilks_lambda looks botched... what are dfnum & dfden for ?
-#TODO : ttest_reel looks botched:  what are x1,x2,v1,v2 for ?
-#TODO : reimplement ksonesamp
-
-__author__ = "Pierre GF Gerard-Marchant"
-__docformat__ = "restructuredtext en"
-
-__all__ = ['argstoarray',
-           'betai',
-           'chisquare','count_tied_groups',
-           'describe',
-           'f_oneway','f_value_wilks_lambda','find_repeats','friedmanchisquare',
-           'gmean',
-           'hmean',
-           'kendalltau','kendalltau_seasonal','kruskal','kruskalwallis',
-           'ks_twosamp','ks_2samp','kurtosis','kurtosistest',
-           'linregress',
-           'mannwhitneyu', 'meppf','mode','moment','mquantiles','msign',
-           'normaltest',
-           'obrientransform',
-           'pearsonr','plotting_positions','pointbiserialr',
-           'rankdata',
-           'samplestd','samplevar','scoreatpercentile','sem','std',
-           'sen_seasonal_slopes','signaltonoise','skew','skewtest','spearmanr',
-           'stderr',
-           'theilslopes','threshold','tmax','tmean','tmin','trim','trimboth',
-           'trimtail','trima','trimr','trimmed_mean','trimmed_std',
-           'trimmed_stde','trimmed_var','tsem','ttest_1samp','ttest_onesamp',
-           'ttest_ind','ttest_rel','tvar',
-           'var','variation',
-           'winsorize',
-           'z','zmap','zs'
-           ]
-
-import numpy as np
-from numpy import ndarray
-import numpy.ma as ma
-from numpy.ma import MaskedArray, masked, nomask
-
-import itertools
-import warnings
-
-
-import scipy.stats as stats
-import scipy.special as special
-import scipy.misc as misc
-import scipy.stats.futil as futil
-
-
-genmissingvaldoc = """
-Notes
------
-    Missing values are considered pair-wise: if a value is missing in x,
-    the corresponding value in y is masked.
-"""
-#------------------------------------------------------------------------------
-def _chk_asarray(a, axis):
-    if axis is None:
-        a = ma.ravel(a)
-        outaxis = 0
-    else:
-        a = ma.asanyarray(a)
-        outaxis = axis
-    return a, outaxis
-
-def _chk2_asarray(a, b, axis):
-    if axis is None:
-        a = ma.ravel(a)
-        b = ma.ravel(b)
-        outaxis = 0
-    else:
-        a = ma.asanyarray(a)
-        b = ma.asanyarray(b)
-        outaxis = axis
-    return a, b, outaxis
-
-def _chk_size(a,b):
-    a = ma.asanyarray(a)
-    b = ma.asanyarray(b)
-    (na, nb) = (a.size, b.size)
-    if na != nb:
-        raise ValueError("The size of the input array should match!"\
-                         " (%s <> %s)" % (na,nb))
-    return (a,b,na)
-
-def argstoarray(*args):
-    """Constructs a 2D array from a sequence of sequences. Sequences are filled
-    with missing values to match the length of the longest sequence.
-
-    Returns
-    -------
-        output : MaskedArray
-            a (mxn) masked array, where m is the number of arguments and n the
-            length of the longest argument.
-    """
-    if len(args) == 1 and not isinstance(args[0], ndarray):
-        output = ma.asarray(args[0])
-        assert(output.ndim == 2, "The input should be 2D!")
-    else:
-        n = len(args)
-        m = max([len(k) for k in args])
-        output = ma.array(np.empty((n,m), dtype=float), mask=True)
-        for (k,v) in enumerate(args):
-            output[k,:len(v)] = v
-    output[np.logical_not(np.isfinite(output._data))] = masked
-    return output
-
-
-
-#####--------------------------------------------------------------------------
-#---- --- Ranking ---
-#####--------------------------------------------------------------------------
-
-def find_repeats(arr):
-    """Find repeats in arr and return a tuple (repeats, repeat_count).
-    Masked values are discarded.
-
-Parameters
-----------
-    arr : sequence
-        Input array. The array is flattened if it is not 1D.
-
-Returns
--------
-    repeats : ndarray
-        Array of repeated values.
-    counts : ndarray
-        Array of counts.
-
-    """
-    marr = ma.compressed(arr)
-    if not marr.size:
-        return (np.array(0), np.array(0))
-    (v1, v2, n) = futil.dfreps(ma.array(ma.compressed(arr), copy=True))
-    return (v1[:n], v2[:n])
-
-
-def count_tied_groups(x, use_missing=False):
-    """Counts the number of tied values in x, and returns a dictionary
-    (nb of ties: nb of groups).
-
-Parameters
-----------
-    x : sequence
-        Sequence of data on which to counts the ties
-    use_missing : boolean
-        Whether to consider missing values as tied.
-
-Example
--------
-    >>>z = [0, 0, 0, 2, 2, 2, 3, 3, 4, 5, 6]
-    >>>count_tied_groups(z)
-    >>>{2:1, 3:2}
-    >>># The ties were 0 (3x), 2 (3x) and 3 (2x)
-    >>>z = ma.array([0, 0, 1, 2, 2, 2, 3, 3, 4, 5, 6])
-    >>>count_tied_groups(z)
-    >>>{2:2, 3:1}
-    >>># The ties were 0 (2x), 2 (3x) and 3 (2x)
-    >>>z[[1,-1]] = masked
-    >>>count_tied_groups(z)
-    >>>{2:2, 3:1}
-    >>># The ties were 2 (3x), 3 (2x) and masked (2x)
-    """
-    nmasked = ma.getmask(x).sum()
-    # We need the copy as find_repeats will overwrite the initial data
-    data = ma.compressed(x).copy()
-    (ties, counts) = find_repeats(data)
-    nties = {}
-    if len(ties):
-        nties = dict(zip(np.unique(counts), itertools.repeat(1)))
-        nties.update(dict(zip(*find_repeats(counts))))
-    if nmasked and use_missing:
-        try:
-            nties[nmasked] += 1
-        except KeyError:
-            nties[nmasked] = 1
-    return nties
-
-
-def rankdata(data, axis=None, use_missing=False):
-    """Returns the rank (also known as order statistics) of each data point
-    along the given axis.
-
-    If some values are tied, their rank is averaged.
-    If some values are masked, their rank is set to 0 if use_missing is False,
-    or set to the average rank of the unmasked values if use_missing is True.
-
-    Parameters
-    ----------
-        data : sequence
-            Input data. The data is transformed to a masked array
-        axis : {None,int} optional
-            Axis along which to perform the ranking.
-            If None, the array is first flattened. An exception is raised if
-            the axis is specified for arrays with a dimension larger than 2
-        use_missing : {boolean} optional
-            Whether the masked values have a rank of 0 (False) or equal to the
-            average rank of the unmasked values (True).
-    """
-    #
-    def _rank1d(data, use_missing=False):
-        n = data.count()
-        rk = np.empty(data.size, dtype=float)
-        idx = data.argsort()
-        rk[idx[:n]] = np.arange(1,n+1)
-        #
-        if use_missing:
-            rk[idx[n:]] = (n+1)/2.
-        else:
-            rk[idx[n:]] = 0
-        #
-        repeats = find_repeats(data.copy())
-        for r in repeats[0]:
-            condition = (data==r).filled(False)
-            rk[condition] = rk[condition].mean()
-        return rk
-    #
-    data = ma.array(data, copy=False)
-    if axis is None:
-        if data.ndim > 1:
-            return _rank1d(data.ravel(), use_missing).reshape(data.shape)
-        else:
-            return _rank1d(data, use_missing)
-    else:
-        return ma.apply_along_axis(_rank1d,axis,data,use_missing).view(ndarray)
-
-
-#####--------------------------------------------------------------------------
-#---- --- Central tendency ---
-#####--------------------------------------------------------------------------
-
-def gmean(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    log_a = ma.log(a)
-    return ma.exp(log_a.mean(axis=axis))
-gmean.__doc__ = stats.gmean.__doc__
-
-
-def hmean(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    if isinstance(a, MaskedArray):
-        size = a.count(axis)
-    else:
-        size = a.shape[axis]
-    return size / (1.0/a).sum(axis)
-hmean.__doc__ = stats.hmean.__doc__
-
-
-def mode(a, axis=0):
-    def _mode1D(a):
-        (rep,cnt) = find_repeats(a)
-        if not cnt.ndim:
-            return (0, 0)
-        elif cnt.size:
-            return (rep[cnt.argmax()], cnt.max())
-        return (a[0], 1)
-    #
-    if axis is None:
-        output = _mode1D(ma.ravel(a))
-        output = (ma.array(output[0]), ma.array(output[1]))
-    else:
-        output = ma.apply_along_axis(_mode1D, axis, a)
-        newshape = list(a.shape)
-        newshape[axis] = 1
-        slices = [slice(None)] * output.ndim
-        slices[axis] = 0
-        modes = output[tuple(slices)].reshape(newshape)
-        slices[axis] = 1
-        counts = output[tuple(slices)].reshape(newshape)
-        output = (modes, counts)
-    return output
-mode.__doc__ = stats.mode.__doc__
-
-
-#####--------------------------------------------------------------------------
-#---- --- Probabilities ---
-#####--------------------------------------------------------------------------
-
-def betai(a, b, x):
-    x = np.asanyarray(x)
-    x = ma.where(x < 1.0, x, 1.0)  # if x > 1 then return 1.0
-    return special.betainc(a, b, x)
-betai.__doc__ = stats.betai.__doc__
-
-
-#####--------------------------------------------------------------------------
-#---- --- Correlation ---
-#####--------------------------------------------------------------------------
-
-def msign(x):
-    """Returns the sign of x, or 0 if x is masked."""
-    return ma.filled(np.sign(x), 0)
-
-cov = ma.cov
-
-corrcoef = ma.corrcoef
-
-
-def pearsonr(x,y):
-    """Calculates a Pearson correlation coefficient and the p-value for testing
-    non-correlation.
-
-    The Pearson correlation coefficient measures the linear relationship
-    between two datasets. Strictly speaking, Pearson's correlation requires
-    that each dataset be normally distributed. Like other correlation
-    coefficients, this one varies between -1 and +1 with 0 implying no
-    correlation. Correlations of -1 or +1 imply an exact linear
-    relationship. Positive correlations imply that as x increases, so does
-    y. Negative correlations imply that as x increases, y decreases.
-
-    The p-value roughly indicates the probability of an uncorrelated system
-    producing datasets that have a Pearson correlation at least as extreme
-    as the one computed from these datasets. The p-values are not entirely
-    reliable but are probably reasonable for datasets larger than 500 or so.
-
-    Parameters
-    ----------
-    x : 1D array
-    y : 1D array the same length as x
-
-    Returns
-    -------
-    (Pearson's correlation coefficient,
-     2-tailed p-value)
-
-    References
-    ----------
-    http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation
-    """
-    (x, y, n) = _chk_size(x, y)
-    (x, y) = (x.ravel(), y.ravel())
-    # Get the common mask and the total nb of unmasked elements
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    n -= m.sum()
-    df = n-2
-    if df < 0:
-        return (masked, masked)
-    #
-    (mx, my) = (x.mean(), y.mean())
-    (xm, ym) = (x-mx, y-my)
-    #
-    r_num = n*(ma.add.reduce(xm*ym))
-    r_den = n*ma.sqrt(ma.dot(xm,xm)*ma.dot(ym,ym))
-    r = (r_num / r_den)
-    # Presumably, if r > 1, then it is only some small artifact of floating
-    # point arithmetic.
-    r = min(r, 1.0)
-    r = max(r, -1.0)
-    df = n-2
-    #
-    t = ma.sqrt(df/((1.0-r)*(1.0+r))) * r
-    if t is masked:
-        prob = 0.
-    else:
-        prob = betai(0.5*df,0.5,df/(df+t*t))
-    return (r,prob)
-
-
-def spearmanr(x, y, use_ties=True):
-    """Calculates a Spearman rank-order correlation coefficient and the p-value
-    to test for non-correlation.
-
-    The Spearman correlation is a nonparametric measure of the linear
-    relationship between two datasets. Unlike the Pearson correlation, the
-    Spearman correlation does not assume that both datasets are normally
-    distributed. Like other correlation coefficients, this one varies
-    between -1 and +1 with 0 implying no correlation. Correlations of -1 or
-    +1 imply an exact linear relationship. Positive correlations imply that
-    as x increases, so does y. Negative correlations imply that as x
-    increases, y decreases.
-
-    Missing values are discarded pair-wise: if a value is missing in x, the
-    corresponding value in y is masked.
-
-    The p-value roughly indicates the probability of an uncorrelated system
-    producing datasets that have a Spearman correlation at least as extreme
-    as the one computed from these datasets. The p-values are not entirely
-    reliable but are probably reasonable for datasets larger than 500 or so.
-
-Parameters
-----------
-    x : 1D array
-    y : 1D array the same length as x
-        The lengths of both arrays must be > 2.
-    use_ties : {True, False} optional
-        Whether the correction for ties should be computed.
-
-Returns
--------
-    (Spearman correlation coefficient,
-     2-tailed p-value)
-
-    References
-    ----------
-    [CRCProbStat2000] section 14.7
-    """
-    (x, y, n) = _chk_size(x, y)
-    (x, y) = (x.ravel(), y.ravel())
-    #
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    n -= m.sum()
-    if m is not nomask:
-        x = ma.array(x, mask=m, copy=True)
-        y = ma.array(y, mask=m, copy=True)
-    df = n-2
-    if df < 0:
-        raise ValueError("The input must have at least 3 entries!")
-    # Gets the ranks and rank differences
-    rankx = rankdata(x)
-    ranky = rankdata(y)
-    dsq = np.add.reduce((rankx-ranky)**2)
-    # Tie correction
-    if use_ties:
-        xties = count_tied_groups(x)
-        yties = count_tied_groups(y)
-        corr_x = np.sum(v*k*(k**2-1) for (k,v) in xties.iteritems())/12.
-        corr_y = np.sum(v*k*(k**2-1) for (k,v) in yties.iteritems())/12.
-    else:
-        corr_x = corr_y = 0
-    denom = n*(n**2 - 1)/6.
-    if corr_x != 0 or corr_y != 0:
-        rho = denom - dsq - corr_x - corr_y
-        rho /= ma.sqrt((denom-2*corr_x)*(denom-2*corr_y))
-    else:
-        rho = 1. - dsq/denom
-    #
-    t = ma.sqrt(ma.divide(df,(rho+1.0)*(1.0-rho))) * rho
-    if t is masked:
-        prob = 0.
-    else:
-        prob = betai(0.5*df,0.5,df/(df+t*t))
-    return rho, prob
-
-
-def kendalltau(x, y, use_ties=True, use_missing=False):
-    """Computes Kendall's rank correlation tau on two variables *x* and *y*.
-
-    Parameters
-    ----------
-    xdata: sequence
-        First data list (for example, time).
-    ydata: sequence
-        Second data list.
-    use_ties: {True, False} optional
-        Whether ties correction should be performed.
-    use_missing: {False, True} optional
-        Whether missing data should be allocated a rank of 0 (False) or the
-        average rank (True)
-
-    Returns
-    -------
-        tau : float
-            Kendall tau
-        prob : float
-            Approximate 2-side p-value.
-    """
-    (x, y, n) = _chk_size(x, y)
-    (x, y) = (x.flatten(), y.flatten())
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    if m is not nomask:
-        x = ma.array(x, mask=m, copy=True)
-        y = ma.array(y, mask=m, copy=True)
-        n -= m.sum()
-    #
-    if n < 2:
-        return (np.nan, np.nan)
-    #
-    rx = ma.masked_equal(rankdata(x, use_missing=use_missing),0)
-    ry = ma.masked_equal(rankdata(y, use_missing=use_missing),0)
-    idx = rx.argsort()
-    (rx, ry) = (rx[idx], ry[idx])
-    C = np.sum((((ry[i+1:]>ry[i])*(rx[i+1:]>rx[i])).filled(0).sum()
-                for i in range(len(ry)-1)))
-    D = np.sum((((ry[i+1:]<ry[i])*(rx[i+1:]>rx[i])).filled(0).sum()
-                for i in range(len(ry)-1)))
-    if use_ties:
-        xties = count_tied_groups(x)
-        yties = count_tied_groups(y)
-        corr_x = np.sum(v*k*(k-1) for (k,v) in xties.iteritems())
-        corr_y = np.sum(v*k*(k-1) for (k,v) in yties.iteritems())
-        denom = ma.sqrt((n*(n-1)-corr_x) * (n*(n-1)-corr_y)) / 2.
-    else:
-        denom = n*(n-1)/2.
-    tau = (C-D) / denom
-    #
-    var_s = n*(n-1)*(2*n+5)
-    if use_ties:
-        var_s -= np.sum(v*k*(k-1)*(2*k+5) for (k,v) in xties.iteritems())
-        var_s -= np.sum(v*k*(k-1)*(2*k+5) for (k,v) in yties.iteritems())
-        v1 = np.sum(v*k*(k-1) for (k,v) in xties.iteritems()) * \
-             np.sum(v*k*(k-1) for (k,v) in yties.iteritems())
-        v1 /= 2.*n*(n-1)
-        if n > 2:
-            v2 = np.sum(v*k*(k-1)*(k-2) for (k,v) in xties.iteritems()) * \
-                 np.sum(v*k*(k-1)*(k-2) for (k,v) in yties.iteritems())
-            v2 /= 9.*n*(n-1)*(n-2)
-        else:
-            v2 = 0
-    else:
-        v1 = v2 = 0
-    var_s /= 18.
-    var_s += (v1 + v2)
-    z = (C-D)/np.sqrt(var_s)
-    prob = special.erfc(abs(z)/np.sqrt(2))
-    return (tau,prob)
-
-
-def kendalltau_seasonal(x):
-    """Computes a multivariate extension Kendall's rank correlation tau, designed
-    for seasonal data.
-
-Parameters
-----------
-    x: 2D array
-        Array of seasonal data, with seasons in columns.
-    """
-    x = ma.array(x, subok=True, copy=False, ndmin=2)
-    (n,m) = x.shape
-    n_p = x.count(0)
-    #
-    S_szn = np.sum(msign(x[i:]-x[i]).sum(0) for i in range(n))
-    S_tot = S_szn.sum()
-    #
-    n_tot = x.count()
-    ties = count_tied_groups(x.compressed())
-    corr_ties =  np.sum(v*k*(k-1) for (k,v) in ties.iteritems())
-    denom_tot = ma.sqrt(n_tot*(n_tot-1)*(n_tot*(n_tot-1)-corr_ties))/2.
-    #
-    R = rankdata(x, axis=0, use_missing=True)
-    K = ma.empty((m,m), dtype=int)
-    covmat = ma.empty((m,m), dtype=float)
-#    cov_jj = ma.empty(m, dtype=float)
-    denom_szn = ma.empty(m, dtype=float)
-    for j in range(m):
-        ties_j = count_tied_groups(x[:,j].compressed())
-        corr_j = np.sum(v*k*(k-1) for (k,v) in ties_j.iteritems())
-        cmb = n_p[j]*(n_p[j]-1)
-        for k in range(j,m,1):
-            K[j,k] = np.sum(msign((x[i:,j]-x[i,j])*(x[i:,k]-x[i,k])).sum()
-                               for i in range(n))
-            covmat[j,k] = (K[j,k] +4*(R[:,j]*R[:,k]).sum() - \
-                           n*(n_p[j]+1)*(n_p[k]+1))/3.
-            K[k,j] = K[j,k]
-            covmat[k,j] = covmat[j,k]
-#        cov_jj[j] = (nn_p*(2*n_p[j]+5))
-#        cov_jj[j] -= np.sum(v*k*(k-1)*(2*k+5) for (k,v) in ties_j.iteritems())
-#        cov_jj[j] /= 18.
-        denom_szn[j] = ma.sqrt(cmb*(cmb-corr_j)) / 2.
-    var_szn = covmat.diagonal()
-    #
-    z_szn = msign(S_szn) * (abs(S_szn)-1) / ma.sqrt(var_szn)
-    z_tot_ind = msign(S_tot) * (abs(S_tot)-1) / ma.sqrt(var_szn.sum())
-    z_tot_dep = msign(S_tot) * (abs(S_tot)-1) / ma.sqrt(covmat.sum())
-    #
-    prob_szn = special.erfc(abs(z_szn)/np.sqrt(2))
-    prob_tot_ind = special.erfc(abs(z_tot_ind)/np.sqrt(2))
-    prob_tot_dep = special.erfc(abs(z_tot_dep)/np.sqrt(2))
-    #
-    chi2_tot = (z_szn*z_szn).sum()
-    chi2_trd = m * z_szn.mean()**2
-    output = {'seasonal tau': S_szn/denom_szn,
-              'global tau': S_tot/denom_tot,
-              'global tau (alt)': S_tot/denom_szn.sum(),
-              'seasonal p-value': prob_szn,
-              'global p-value (indep)': prob_tot_ind,
-              'global p-value (dep)': prob_tot_dep,
-              'chi2 total': chi2_tot,
-              'chi2 trend': chi2_trd,
-              }
-    return output
-
-
-def pointbiserialr(x, y):
-    x = ma.fix_invalid(x, copy=True).astype(bool)
-    y = ma.fix_invalid(y, copy=True).astype(float)
-    # Get rid of the missing data ..........
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    if m is not nomask:
-        unmask = np.logical_not(m)
-        x = x[unmask]
-        y = y[unmask]
-    #
-    n = len(x)
-    # phat is the fraction of x values that are True
-    phat = x.sum() / float(n)
-    y0 = y[~x]  # y-values where x is False
-    y1 = y[x]  # y-values where x is True
-    y0m = y0.mean()
-    y1m = y1.mean()
-    #
-    rpb = (y1m - y0m)*np.sqrt(phat * (1-phat)) / y.std()
-    #
-    df = n-2
-    t = rpb*ma.sqrt(df/(1.0-rpb**2))
-    prob = betai(0.5*df, 0.5, df/(df+t*t))
-    return rpb, prob
-
-if stats.pointbiserialr.__doc__:
-    pointbiserialr.__doc__ = stats.pointbiserialr.__doc__ + genmissingvaldoc
-
-
-def linregress(*args):
-    if len(args) == 1:  # more than 1D array?
-        args = ma.array(args[0], copy=True)
-        if len(args) == 2:
-            x = args[0]
-            y = args[1]
-        else:
-            x = args[:,0]
-            y = args[:,1]
-    else:
-        x = ma.array(args[0]).flatten()
-        y = ma.array(args[1]).flatten()
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    if m is not nomask:
-        x = ma.array(x,mask=m)
-        y = ma.array(y,mask=m)
-    n = len(x)
-    (xmean, ymean) = (x.mean(), y.mean())
-    (xm, ym) = (x-xmean, y-ymean)
-    (Sxx, Syy) = (ma.add.reduce(xm*xm), ma.add.reduce(ym*ym))
-    Sxy = ma.add.reduce(xm*ym)
-    r_den = ma.sqrt(Sxx*Syy)
-    if r_den == 0.0:
-        r = 0.0
-    else:
-        r = Sxy / r_den
-        if (r > 1.0):
-            r = 1.0 # from numerical error
-    #z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY))
-    df = n-2
-    t = r * ma.sqrt(df/(1.0-r*r))
-    prob = betai(0.5*df,0.5,df/(df+t*t))
-    slope = Sxy / Sxx
-    intercept = ymean - slope*xmean
-    sterrest = ma.sqrt(1.-r*r) * y.std()
-    return slope, intercept, r, prob, sterrest, Syy/Sxx
-
-if stats.linregress.__doc__:
-    linregress.__doc__ = stats.linregress.__doc__ + genmissingvaldoc
-
-
-def theilslopes(y, x=None, alpha=0.05):
-    """Computes the Theil slope over the dataset (x,y), as the median of all slopes
-    between paired values.
-
-    Parameters
-    ----------
-        y : sequence
-            Dependent variable.
-        x : {None, sequence} optional
-            Independent variable. If None, use arange(len(y)) instead.
-        alpha : float
-            Confidence degree.
-
-    Returns
-    -------
-        medslope : float
-            Theil slope
-        medintercept : float
-            Intercept of the Theil line, as median(y)-medslope*median(x)
-        lo_slope : float
-            Lower bound of the confidence interval on medslope
-        up_slope : float
-            Upper bound of the confidence interval on medslope
-
-    """
-    y = ma.asarray(y).flatten()
-    y[-1] = masked
-    n = len(y)
-    if x is None:
-        x = ma.arange(len(y), dtype=float)
-    else:
-        x = ma.asarray(x).flatten()
-        if len(x) != n:
-            raise ValueError, "Incompatible lengths ! (%s<>%s)" % (n,len(x))
-    m = ma.mask_or(ma.getmask(x), ma.getmask(y))
-    y._mask = x._mask = m
-    ny = y.count()
-    #
-    slopes = ma.hstack([(y[i+1:]-y[i])/(x[i+1:]-x[i]) for i in range(n-1)])
-    slopes.sort()
-    medslope = ma.median(slopes)
-    medinter = ma.median(y) - medslope*ma.median(x)
-    #
-    if alpha > 0.5:
-        alpha = 1.-alpha
-    z = stats.distributions.norm.ppf(alpha/2.)
-    #
-    (xties, yties) = (count_tied_groups(x), count_tied_groups(y))
-    nt = ny*(ny-1)/2.
-    sigsq = (ny*(ny-1)*(2*ny+5)/18.)
-    sigsq -= np.sum(v*k*(k-1)*(2*k+5) for (k,v) in xties.iteritems())
-    sigsq -= np.sum(v*k*(k-1)*(2*k+5) for (k,v) in yties.iteritems())
-    sigma = np.sqrt(sigsq)
-
-    Ru = min(np.round((nt - z*sigma)/2. + 1), len(slopes)-1)
-    Rl = max(np.round((nt + z*sigma)/2.), 0)
-    delta = slopes[[Rl,Ru]]
-    return medslope, medinter, delta[0], delta[1]
-
-
-def sen_seasonal_slopes(x):
-    x = ma.array(x, subok=True, copy=False, ndmin=2)
-    (n,_) = x.shape
-    # Get list of slopes per season
-    szn_slopes = ma.vstack([(x[i+1:]-x[i])/np.arange(1,n-i)[:,None]
-                            for i in range(n)])
-    szn_medslopes = ma.median(szn_slopes, axis=0)
-    medslope = ma.median(szn_slopes, axis=None)
-    return szn_medslopes, medslope
-
-
-#####--------------------------------------------------------------------------
-#---- --- Inferential statistics ---
-#####--------------------------------------------------------------------------
-
-def ttest_onesamp(a, popmean):
-    a = ma.asarray(a)
-    x = a.mean(axis=None)
-    v = a.var(axis=None,ddof=1)
-    n = a.count(axis=None)
-    df = n-1
-    svar = ((n-1)*v) / float(df)
-    t = (x-popmean)/ma.sqrt(svar*(1.0/n))
-    prob = betai(0.5*df,0.5,df/(df+t*t))
-    return t,prob
-ttest_onesamp.__doc__ = stats.ttest_1samp.__doc__
-ttest_1samp = ttest_onesamp
-
-
-def ttest_ind(a, b, axis=0):
-    a, b, axis = _chk2_asarray(a, b, axis)
-    (x1, x2) = (a.mean(axis), b.mean(axis))
-    (v1, v2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1))
-    (n1, n2) = (a.count(axis), b.count(axis))
-    df = n1+n2-2
-    svar = ((n1-1)*v1+(n2-1)*v2) / float(df)
-    svar == 0
-    t = (x1-x2)/ma.sqrt(svar*(1.0/n1 + 1.0/n2))  # N-D COMPUTATION HERE!!!!!!
-    t = ma.filled(t, 1)           # replace NaN t-values with 1.0
-    probs = betai(0.5*df,0.5,float(df)/(df+t*t)).reshape(t.shape)
-    return t, probs.squeeze()
-ttest_ind.__doc__ = stats.ttest_ind.__doc__
-
-
-def ttest_rel(a,b,axis=None):
-    a, b, axis = _chk2_asarray(a, b, axis)
-    if len(a)!=len(b):
-        raise ValueError, 'unequal length arrays'
-    (x1, x2) = (a.mean(axis), b.mean(axis))
-    (v1, v2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1))
-    n = a.count(axis)
-    df = (n-1.0)
-    d = (a-b).astype('d')
-    denom = ma.sqrt((n*ma.add.reduce(d*d,axis) - ma.add.reduce(d,axis)**2) /df)
-    #zerodivproblem = denom == 0
-    t = ma.add.reduce(d, axis) / denom
-    t = ma.filled(t, 1)
-    probs = betai(0.5*df,0.5,df/(df+t*t)).reshape(t.shape).squeeze()
-    return t, probs
-ttest_rel.__doc__ = stats.ttest_rel.__doc__
-
-
-def chisquare(f_obs, f_exp=None):
-    f_obs = ma.asarray(f_obs)
-    if f_exp is None:
-        f_exp = ma.array([f_obs.mean(axis=0)] * len(f_obs))
-    f_exp = f_exp.astype(float)
-    chisq = ma.add.reduce((f_obs-f_exp)**2 / f_exp)
-    return chisq, stats.chisqprob(chisq, f_obs.count(0)-1)
-chisquare.__doc__ = stats.chisquare.__doc__
-
-
-def mannwhitneyu(x,y, use_continuity=True):
-    """Computes the Mann-Whitney on samples x and y.
-    Missing values in x and/or y are discarded.
-
-    Parameters
-    ----------
-        x : sequence
-        y : sequence
-        use_continuity : {True, False} optional
-            Whether a continuity correction (1/2.) should be taken into account.
-
-    Returns
-    -------
-        u : float
-            The Mann-Whitney statistics
-        prob : float
-            Approximate p-value assuming a normal distribution.
-
-    """
-    x = ma.asarray(x).compressed().view(ndarray)
-    y = ma.asarray(y).compressed().view(ndarray)
-    ranks = rankdata(np.concatenate([x,y]))
-    (nx, ny) = (len(x), len(y))
-    nt = nx + ny
-    U = ranks[:nx].sum() - nx*(nx+1)/2.
-    U = max(U, nx*ny - U)
-    u = nx*ny - U
-    #
-    mu = (nx*ny)/2.
-    sigsq = (nt**3 - nt)/12.
-    ties = count_tied_groups(ranks)
-    sigsq -= np.sum(v*(k**3-k) for (k,v) in ties.iteritems())/12.
-    sigsq *= nx*ny/float(nt*(nt-1))
-    #
-    if use_continuity:
-        z = (U - 1/2. - mu) / ma.sqrt(sigsq)
-    else:
-        z = (U - mu) / ma.sqrt(sigsq)
-    prob = special.erfc(abs(z)/np.sqrt(2))
-    return (u, prob)
-
-
-def kruskalwallis(*args):
-    output = argstoarray(*args)
-    ranks = ma.masked_equal(rankdata(output, use_missing=False), 0)
-    sumrk = ranks.sum(-1)
-    ngrp = ranks.count(-1)
-    ntot = ranks.count()
-#    ssbg = (sumrk**2/ranks.count(-1)).sum() - ranks.sum()**2/ntotal
-#    H = ssbg / (ntotal*(ntotal+1)/12.)
-    H = 12./(ntot*(ntot+1)) * (sumrk**2/ngrp).sum() - 3*(ntot+1)
-    # Tie correction
-    ties = count_tied_groups(ranks)
-    T = 1. - np.sum(v*(k**3-k) for (k,v) in ties.iteritems())/float(ntot**3-ntot)
-    if T == 0:
-        raise ValueError, 'All numbers are identical in kruskal'
-    H /= T
-    #
-    df = len(output) - 1
-    prob = stats.chisqprob(H,df)
-    return (H, prob)
-kruskal = kruskalwallis
-kruskalwallis.__doc__ = stats.kruskal.__doc__
-
-
-_kolmog2 = special.kolmogorov
-def _kolmog1(x,n):
-    if x <= 0:
-        return 0
-    if x >= 1:
-        return 1
-    j = np.arange(np.floor(n*(1-x))+1)
-    return 1 - x * np.sum(np.exp(np.log(misc.comb(n,j))
-                                       + (n-j) * np.log(1-x-j/float(n))
-                                       + (j-1) * np.log(x+j/float(n))))
-
-
-def ks_twosamp(data1, data2, alternative="two_sided"):
-    """Computes the Kolmogorov-Smirnov test on two samples.
-    Missing values are discarded.
-
-    Parameters
-    ----------
-        data1 : sequence
-            First data set
-        data2 : sequence
-            Second data set
-        alternative : {'two_sided', 'less', 'greater'} optional
-            Indicates the alternative hypothesis.
-
-    Returns
-    -------
-        d : float
-            Value of the Kolmogorov Smirnov test
-        p : float
-            Corresponding p-value.
-
-    """
-    (data1, data2) = (ma.asarray(data1), ma.asarray(data2))
-    (n1, n2) = (data1.count(), data2.count())
-    n = (n1*n2/float(n1+n2))
-    mix = ma.concatenate((data1.compressed(), data2.compressed()))
-    mixsort = mix.argsort(kind='mergesort')
-    csum = np.where(mixsort<n1, 1./n1, -1./n2).cumsum()
-    # Check for ties
-    if len(np.unique(mix)) < (n1+n2):
-        csum = csum[np.r_[np.diff(mix[mixsort]).nonzero()[0],-1]]
-    #
-    alternative = str(alternative).lower()[0]
-    if alternative == 't':
-        d = ma.abs(csum).max()
-        prob = _kolmog2(np.sqrt(n)*d)
-    elif alternative == 'l':
-        d = -csum.min()
-        prob = np.exp(-2*n*d**2)
-    elif alternative == 'g':
-        d = csum.max()
-        prob = np.exp(-2*n*d**2)
-    else:
-        raise ValueError("Invalid value for the alternative hypothesis: "\
-                         "should be in 'two_sided', 'less' or 'greater'")
-    return (d, prob)
-ks_2samp = ks_twosamp
-
-
-def ks_twosamp_old(data1, data2):
-    """ Computes the Kolmogorov-Smirnov statistic on 2 samples.
-    Returns: KS D-value, p-value
-    #"""
-    (data1, data2) = [ma.asarray(d).compressed() for d in (data1,data2)]
-    return stats.ks_2samp(data1,data2)
-
-
-#####--------------------------------------------------------------------------
-#---- --- Trimming ---
-#####--------------------------------------------------------------------------
-
-def threshold(a, threshmin=None, threshmax=None, newval=0):
-    """Clip array to a given value.
-
-Similar to numpy.clip(), except that values less than threshmin or
-greater than threshmax are replaced by newval, instead of by
-threshmin and threshmax respectively.
-
-Parameters
-----------
-    a : ndarray
-        Input data
-    threshmin : {None, float} optional
-        Lower threshold. If None, set to the minimum value.
-    threshmax : {None, float} optional
-        Upper threshold. If None, set to the maximum value.
-    newval : {0, float} optional
-        Value outside the thresholds.
-
-Returns
--------
-    a, with values less (greater) than threshmin (threshmax) replaced with newval.
-
-"""
-    a = ma.array(a, copy=True)
-    mask = np.zeros(a.shape, dtype=bool)
-    if threshmin is not None:
-        mask |= (a < threshmin).filled(False)
-    if threshmax is not None:
-        mask |= (a > threshmax).filled(False)
-    a[mask] = newval
-    return a
-
-
-def trima(a, limits=None, inclusive=(True,True)):
-    """Trims an array by masking the data outside some given limits.
-    Returns a masked version of the input array.
-
-    Parameters
-    ----------
-    a : sequence
-        Input array.
-    limits : {None, tuple} optional
-        Tuple of (lower limit, upper limit) in absolute values.
-        Values of the input array lower (greater) than the lower (upper) limit
-        will be masked. A limit is None indicates an open interval.
-    inclusive : {(True,True) tuple} optional
-        Tuple of (lower flag, upper flag), indicating whether values exactly
-        equal to the lower (upper) limit are allowed.
-
-    """
-    a = ma.asarray(a)
-    a.unshare_mask()
-    if limits is None:
-        return a
-    (lower_lim, upper_lim) = limits
-    (lower_in, upper_in) = inclusive
-    condition = False
-    if lower_lim is not None:
-        if lower_in:
-            condition |= (a < lower_lim)
-        else:
-            condition |= (a <= lower_lim)
-    if upper_lim is not None:
-        if upper_in:
-            condition |= (a > upper_lim)
-        else:
-            condition |= (a >= upper_lim)
-    a[condition.filled(True)] = masked
-    return a
-
-
-def trimr(a, limits=None, inclusive=(True, True), axis=None):
-    """Trims an array by masking some proportion of the data on each end.
-    Returns a masked version of the input array.
-
-    Parameters
-    ----------
-    a : sequence
-        Input array.
-    limits : {None, tuple} optional
-        Tuple of the percentages to cut on each side of the array, with respect
-        to the number of unmasked data, as floats between 0. and 1.
-        Noting n the number of unmasked data before trimming, the (n*limits[0])th
-        smallest data and the (n*limits[1])th largest data are masked, and the
-        total number of unmasked data after trimming is n*(1.-sum(limits))
-        The value of one limit can be set to None to indicate an open interval.
-    inclusive : {(True,True) tuple} optional
-        Tuple of flags indicating whether the number of data being masked on the
-        left (right) end should be truncated (True) or rounded (False) to integers.
-    axis : {None,int} optional
-        Axis along which to trim. If None, the whole array is trimmed, but its
-        shape is maintained.
-
-    """
-    def _trimr1D(a, low_limit, up_limit, low_inclusive, up_inclusive):
-        n = a.count()
-        idx = a.argsort()
-        if low_limit:
-            if low_inclusive:
-                lowidx = int(low_limit*n)
-            else:
-                lowidx = np.round(low_limit*n)
-            a[idx[:lowidx]] = masked
-        if up_limit is not None:
-            if up_inclusive:
-                upidx = n - int(n*up_limit)
-            else:
-                upidx = n- np.round(n*up_limit)
-            a[idx[upidx:]] = masked
-        return a
-    #
-    a = ma.asarray(a)
-    a.unshare_mask()
-    if limits is None:
-        return a
-    # Check the limits
-    (lolim, uplim) = limits
-    errmsg = "The proportion to cut from the %s should be between 0. and 1."
-    if lolim is not None:
-        if lolim > 1. or lolim < 0:
-            raise ValueError(errmsg % 'beginning' + "(got %s)" % lolim)
-    if uplim is not None:
-        if uplim > 1. or uplim < 0:
-            raise ValueError(errmsg % 'end' + "(got %s)" % uplim)
-    #
-    (loinc, upinc) = inclusive
-    #
-    if axis is None:
-        shp = a.shape
-        return _trimr1D(a.ravel(),lolim,uplim,loinc,upinc).reshape(shp)
-    else:
-        return ma.apply_along_axis(_trimr1D, axis, a, lolim,uplim,loinc,upinc)
-
-trimdoc = """
-    Parameters
-    ----------
-    a : sequence
-        Input array
-    limits : {None, tuple} optional
-        If relative == False, tuple (lower limit, upper limit) in absolute values.
-        Values of the input array lower (greater) than the lower (upper) limit are
-        masked.
-        If relative == True, tuple (lower percentage, upper percentage) to cut
-        on each side of the  array, with respect to the number of unmasked data.
-        Noting n the number of unmasked data before trimming, the (n*limits[0])th
-        smallest data and the (n*limits[1])th largest data are masked, and the
-        total number of unmasked data after trimming is n*(1.-sum(limits))
-        In each case, the value of one limit can be set to None to indicate an
-        open interval.
-        If limits is None, no trimming is performed
-    inclusive : {(True, True) tuple} optional
-        If relative==False, tuple indicating whether values exactly equal to the
-        absolute limits are allowed.
-        If relative==True, tuple indicating whether the number of data being masked
-        on each side should be rounded (True) or truncated (False).
-    relative : {False, True} optional
-        Whether to consider the limits as absolute values (False) or proportions
-        to cut (True).
-    axis : {None, integer}, optional
-        Axis along which to trim.
-"""
-
-
-def trim(a, limits=None, inclusive=(True,True), relative=False, axis=None):
-    """Trims an array by masking the data outside some given limits.
-    Returns a masked version of the input array.
-   %s
-
-    Examples
-    --------
-        >>>z = [ 1, 2, 3, 4, 5, 6, 7, 8, 9,10]
-        >>>trim(z,(3,8))
-        [--,--, 3, 4, 5, 6, 7, 8,--,--]
-        >>>trim(z,(0.1,0.2),relative=True)
-        [--, 2, 3, 4, 5, 6, 7, 8,--,--]
-
-
-    """
-    if relative:
-        return trimr(a, limits=limits, inclusive=inclusive, axis=axis)
-    else:
-        return trima(a, limits=limits, inclusive=inclusive)
-
-if trim.__doc__:
-    trim.__doc__ = trim.__doc__ % trimdoc
-
-
-def trimboth(data, proportiontocut=0.2, inclusive=(True,True), axis=None):
-    """Trims the data by masking the int(proportiontocut*n) smallest and
-    int(proportiontocut*n) largest values of data along the given axis, where n
-    is the number of unmasked values before trimming.
-
-Parameters
-----------
-    data : ndarray
-        Data to trim.
-    proportiontocut : {0.2, float} optional
-        Percentage of trimming (as a float between 0 and 1).
-        If n is the number of unmasked values before trimming, the number of
-        values after trimming is:
-            (1-2*proportiontocut)*n.
-    inclusive : {(True, True) tuple} optional
-        Tuple indicating whether the number of data being masked on each side
-        should be rounded (True) or truncated (False).
-    axis : {None, integer}, optional
-        Axis along which to perform the trimming.
-        If None, the input array is first flattened.
-
-    """
-    return trimr(data, limits=(proportiontocut,proportiontocut),
-                 inclusive=inclusive, axis=axis)
-
-#..............................................................................
-def trimtail(data, proportiontocut=0.2, tail='left', inclusive=(True,True),
-             axis=None):
-    """Trims the data by masking int(trim*n) values from ONE tail of the
-    data along the given axis, where n is the number of unmasked values.
-
-Parameters
-----------
-    data : {ndarray}
-        Data to trim.
-    proportiontocut : {0.2, float} optional
-        Percentage of trimming. If n is the number of unmasked values
-        before trimming, the number of values after trimming is
-        (1-proportiontocut)*n.
-    tail : {'left','right'} optional
-        If left (right), the ``proportiontocut`` lowest (greatest) values will
-        be masked.
-    inclusive : {(True, True) tuple} optional
-        Tuple indicating whether the number of data being masked on each side
-        should be rounded (True) or truncated (False).
-    axis : {None, integer}, optional
-        Axis along which to perform the trimming.
-        If None, the input array is first flattened.
-
-    """
-    tail = str(tail).lower()[0]
-    if tail == 'l':
-        limits = (proportiontocut,None)
-    elif tail == 'r':
-        limits = (None, proportiontocut)
-    else:
-        raise TypeError("The tail argument should be in ('left','right')")
-    return trimr(data, limits=limits, axis=axis, inclusive=inclusive)
-
-trim1 = trimtail
-
-def trimmed_mean(a, limits=(0.1,0.1), inclusive=(1,1), relative=True,
-                 axis=None):
-    """Returns the trimmed mean of the data along the given axis.
-
-    %s
-
-    """ % trimdoc
-    if (not isinstance(limits,tuple)) and isinstance(limits,float):
-        limits = (limits, limits)
-    if relative:
-        return trimr(a,limits=limits,inclusive=inclusive,axis=axis).mean(axis=axis)
-    else:
-        return trima(a,limits=limits,inclusive=inclusive).mean(axis=axis)
-
-
-def trimmed_var(a, limits=(0.1,0.1), inclusive=(1,1), relative=True,
-                axis=None, ddof=0):
-    """Returns the trimmed variance of the data along the given axis.
-
-    %s
-    ddof : {0,integer}, optional
-        Means Delta Degrees of Freedom. The denominator used during computations
-        is (n-ddof). DDOF=0 corresponds to a biased estimate, DDOF=1 to an un-
-        biased estimate of the variance.
-
-    """ % trimdoc
-    if (not isinstance(limits,tuple)) and isinstance(limits,float):
-        limits = (limits, limits)
-    if relative:
-        out = trimr(a,limits=limits, inclusive=inclusive,axis=axis)
-    else:
-        out = trima(a,limits=limits,inclusive=inclusive)
-    return out.var(axis=axis, ddof=ddof)
-
-
-def trimmed_std(a, limits=(0.1,0.1), inclusive=(1,1), relative=True,
-                axis=None, ddof=0):
-    """Returns the trimmed standard deviation of the data along the given axis.
-
-    %s
-    ddof : {0,integer}, optional
-        Means Delta Degrees of Freedom. The denominator used during computations
-        is (n-ddof). DDOF=0 corresponds to a biased estimate, DDOF=1 to an un-
-        biased estimate of the variance.
-
-    """ % trimdoc
-    if (not isinstance(limits,tuple)) and isinstance(limits,float):
-        limits = (limits, limits)
-    if relative:
-        out = trimr(a,limits=limits,inclusive=inclusive,axis=axis)
-    else:
-        out = trima(a,limits=limits,inclusive=inclusive)
-    return out.std(axis=axis,ddof=ddof)
-
-
-def trimmed_stde(a, limits=(0.1,0.1), inclusive=(1,1), axis=None):
-    """Returns the standard error of the trimmed mean of the data along the given
-    axis.
-    Parameters
-    ----------
-    a : sequence
-        Input array
-    limits : {(0.1,0.1), tuple of float} optional
-        tuple (lower percentage, upper percentage) to cut  on each side of the
-        array, with respect to the number of unmasked data.
-        Noting n the number of unmasked data before trimming, the (n*limits[0])th
-        smallest data and the (n*limits[1])th largest data are masked, and the
-        total number of unmasked data after trimming is n*(1.-sum(limits))
-        In each case, the value of one limit can be set to None to indicate an
-        open interval.
-        If limits is None, no trimming is performed
-    inclusive : {(True, True) tuple} optional
-        Tuple indicating whether the number of data being masked on each side
-        should be rounded (True) or truncated (False).
-    axis : {None, integer}, optional
-        Axis along which to trim.
-    """
-    #........................
-    def _trimmed_stde_1D(a, low_limit, up_limit, low_inclusive, up_inclusive):
-        "Returns the standard error of the trimmed mean for a 1D input data."
-        n = a.count()
-        idx = a.argsort()
-        if low_limit:
-            if low_inclusive:
-                lowidx = int(low_limit*n)
-            else:
-                lowidx = np.round(low_limit*n)
-            a[idx[:lowidx]] = masked
-        if up_limit is not None:
-            if up_inclusive:
-                upidx = n - int(n*up_limit)
-            else:
-                upidx = n- np.round(n*up_limit)
-            a[idx[upidx:]] = masked
-        nsize = a.count()
-        a[idx[:lowidx]] = a[idx[lowidx]]
-        a[idx[upidx:]] = a[idx[upidx-1]]
-        winstd = a.std(ddof=1)
-        return winstd / ((1-low_limit-up_limit)*np.sqrt(len(a)))
-    #........................
-    a = ma.array(a, copy=True, subok=True)
-    a.unshare_mask()
-    if limits is None:
-        return a.std(axis=axis,ddof=1)/ma.sqrt(a.count(axis))
-    if (not isinstance(limits,tuple)) and isinstance(limits,float):
-        limits = (limits, limits)
-    # Check the limits
-    (lolim, uplim) = limits
-    errmsg = "The proportion to cut from the %s should be between 0. and 1."
-    if lolim is not None:
-        if lolim > 1. or lolim < 0:
-            raise ValueError(errmsg % 'beginning' + "(got %s)" % lolim)
-    if uplim is not None:
-        if uplim > 1. or uplim < 0:
-            raise ValueError(errmsg % 'end' + "(got %s)" % uplim)
-    #
-    (loinc, upinc) = inclusive
-    if (axis is None):
-        shp = a.shape
-        return _trimmed_stde_1D(a.ravel(),lolim,uplim,loinc,upinc)
-    else:
-        assert a.ndim <= 2, "Array should be 2D at most !"
-        return ma.apply_along_axis(_trimmed_stde_1D, axis, a,
-                                   lolim,uplim,loinc,upinc)
-
-
-def tmean(a, limits=None, inclusive=(True,True)):
-    return trima(a, limits=limits, inclusive=inclusive).mean()
-tmean.__doc__ = stats.tmean.__doc__
-
-def tvar(a, limits=None, inclusive=(True,True)):
-    return trima(a, limits=limits, inclusive=inclusive).var()
-tvar.__doc__ = stats.tvar.__doc__
-
-def tmin(a, lowerlimit=None, axis=0, inclusive=True):
-    a, axis = _chk_asarray(a, axis)
-    am = trima(a, (lowerlimit, None), (inclusive, False))
-    return ma.minimum.reduce(am, axis)
-tmin.__doc__  = stats.tmin.__doc__
-
-def tmax(a, upperlimit, axis=0, inclusive=True):
-    a, axis = _chk_asarray(a, axis)
-    am = trima(a, (None, upperlimit), (False, inclusive))
-    return ma.maximum.reduce(am, axis)
-tmax.__doc__  = stats.tmax.__doc__
-
-def tsem(a, limits=None, inclusive=(True,True)):
-    a = ma.asarray(a).ravel()
-    if limits is None:
-        n = float(a.count())
-        return a.std()/ma.sqrt(n)
-    am = trima(a.ravel(), limits, inclusive)
-    sd = np.sqrt(am.var())
-    return sd / am.count()
-tsem.__doc__ = stats.tsem.__doc__
-
-
-def winsorize(a, limits=None, inclusive=(True,True), inplace=False, axis=None):
-    """Returns a Winsorized version of the input array.
-
-    The (limits[0])th lowest values are set to the (limits[0])th percentile,
-    and the (limits[1])th highest values are set to the (limits[1])th
-    percentile.
-    Masked values are skipped.
-
-
-    Parameters
-    ----------
-    a : sequence
-        Input array.
-    limits : {None, tuple of float} optional
-        Tuple of the percentages to cut on each side of the array, with respect
-        to the number of unmasked data, as floats between 0. and 1.
-        Noting n the number of unmasked data before trimming, the (n*limits[0])th
-        smallest data and the (n*limits[1])th largest data are masked, and the
-        total number of unmasked data after trimming is n*(1.-sum(limits))
-        The value of one limit can be set to None to indicate an open interval.
-    inclusive : {(True, True) tuple} optional
-        Tuple indicating whether the number of data being masked on each side
-        should be rounded (True) or truncated (False).
-    inplace : {False, True} optional
-        Whether to winsorize in place (True) or to use a copy (False)
-    axis : {None, int} optional
-        Axis along which to trim. If None, the whole array is trimmed, but its
-        shape is maintained.
-
-    """
-    def _winsorize1D(a, low_limit, up_limit, low_include, up_include):
-        n = a.count()
-        idx = a.argsort()
-        if low_limit:
-            if low_include:
-                lowidx = int(low_limit*n)
-            else:
-                lowidx = np.round(low_limit*n)
-            a[idx[:lowidx]] = a[idx[lowidx]]
-        if up_limit is not None:
-            if up_include:
-                upidx = n - int(n*up_limit)
-            else:
-                upidx = n- np.round(n*up_limit)
-            a[idx[upidx:]] = a[idx[upidx-1]]
-        return a
-    # We gonna modify a: better make a copy
-    a = ma.array(a, copy=np.logical_not(inplace))
-    #
-    if limits is None:
-        return a
-    if (not isinstance(limits,tuple)) and isinstance(limits,float):
-        limits = (limits, limits)
-    # Check the limits
-    (lolim, uplim) = limits
-    errmsg = "The proportion to cut from the %s should be between 0. and 1."
-    if lolim is not None:
-        if lolim > 1. or lolim < 0:
-            raise ValueError(errmsg % 'beginning' + "(got %s)" % lolim)
-    if uplim is not None:
-        if uplim > 1. or uplim < 0:
-            raise ValueError(errmsg % 'end' + "(got %s)" % uplim)
-    #
-    (loinc, upinc) = inclusive
-    #
-    if axis is None:
-        shp = a.shape
-        return _winsorize1D(a.ravel(),lolim,uplim,loinc,upinc).reshape(shp)
-    else:
-        return ma.apply_along_axis(_winsorize1D, axis,a,lolim,uplim,loinc,upinc)
-
-
-#####--------------------------------------------------------------------------
-#---- --- Moments ---
-#####--------------------------------------------------------------------------
-
-def moment(a, moment=1, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    if moment == 1:
-        # By definition the first moment about the mean is 0.
-        shape = list(a.shape)
-        del shape[axis]
-        if shape:
-            # return an actual array of the appropriate shape
-            return np.zeros(shape, dtype=float)
-        else:
-            # the input was 1D, so return a scalar instead of a rank-0 array
-            return np.float64(0.0)
-    else:
-        mn = ma.expand_dims(a.mean(axis=axis), axis)
-        s = ma.power((a-mn), moment)
-        return s.mean(axis=axis)
-moment.__doc__ = stats.moment.__doc__
-
-
-def variation(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    return a.std(axis)/a.mean(axis)
-variation.__doc__ = stats.variation.__doc__
-
-
-def skew(a, axis=0, bias=True):
-    a, axis = _chk_asarray(a,axis)
-    n = a.count(axis)
-    m2 = moment(a, 2, axis)
-    m3 = moment(a, 3, axis)
-    vals = ma.where(m2 == 0, 0, m3 / m2**1.5)
-    if not bias:
-        can_correct = (n > 2) & (m2 > 0)
-        if can_correct.any():
-            m2 = np.extract(can_correct, m2)
-            m3 = np.extract(can_correct, m3)
-            nval = ma.sqrt((n-1.0)*n)/(n-2.0)*m3/m2**1.5
-            np.place(vals, can_correct, nval)
-    return vals
-skew.__doc__ = stats.skew.__doc__
-
-
-def kurtosis(a, axis=0, fisher=True, bias=True):
-    a, axis = _chk_asarray(a, axis)
-    n = a.count(axis)
-    m2 = moment(a,2,axis)
-    m4 = moment(a,4,axis)
-    vals = ma.where(m2 == 0, 0, m4/ m2**2.0)
-    if not bias:
-        can_correct = (n > 3) & (m2 > 0)
-        if can_correct.any():
-            m2 = np.extract(can_correct, m2)
-            m4 = np.extract(can_correct, m4)
-            nval = 1.0/(n-2)/(n-3)*((n*n-1.0)*m4/m2**2.0-3*(n-1)**2.0)
-            np.place(vals, can_correct, nval+3.0)
-    if fisher:
-        return vals - 3
-    else:
-        return vals
-kurtosis.__doc__ = stats.kurtosis.__doc__
-
-def describe(a, axis=0):
-    """Computes several descriptive statistics of the passed array.
-
-    Parameters
-    ----------
-    a : array
-    axis : int or None
-
-    Returns
-    -------
-    (size of the data (discarding missing values),
-     (min, max),
-     arithmetic mean,
-     unbiased variance,
-     biased skewness,
-     biased kurtosis)
-    """
-    a, axis = _chk_asarray(a, axis)
-    n = a.count(axis)
-    mm = (ma.minimum.reduce(a), ma.maximum.reduce(a))
-    m = a.mean(axis)
-    v = a.var(axis)
-    sk = skew(a, axis)
-    kurt = kurtosis(a, axis)
-    return n, mm, m, v, sk, kurt
-
-#.............................................................................
-def stde_median(data, axis=None):
-    """Returns the McKean-Schrader estimate of the standard error of the sample
-median along the given axis. masked values are discarded.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        axis : {None,int} optional
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    """
-    def _stdemed_1D(data):
-        data = np.sort(data.compressed())
-        n = len(sorted)
-        z = 2.5758293035489004
-        k = int(np.round((n+1)/2. - z * np.sqrt(n/4.),0))
-        return ((data[n-k] - data[k-1])/(2.*z))
-    #
-    data = ma.array(data, copy=False, subok=True)
-    if (axis is None):
-        return _stdemed_1D(data)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return ma.apply_along_axis(_stdemed_1D, axis, data)
-
-#####--------------------------------------------------------------------------
-#---- --- Normality Tests ---
-#####--------------------------------------------------------------------------
-
-def skewtest(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    if axis is None:
-        a = a.ravel()
-        axis = 0
-    b2 = skew(a,axis)
-    n = a.count(axis)
-    if np.min(n) < 8:
-        warnings.warn(
-            "skewtest only valid for n>=8 ... continuing anyway, n=%i" %
-            np.min(n))
-    y = b2 * ma.sqrt(((n+1)*(n+3)) / (6.0*(n-2)) )
-    beta2 = ( 3.0*(n*n+27*n-70)*(n+1)*(n+3) ) / ( (n-2.0)*(n+5)*(n+7)*(n+9) )
-    W2 = -1 + ma.sqrt(2*(beta2-1))
-    delta = 1/ma.sqrt(0.5*ma.log(W2))
-    alpha = ma.sqrt(2.0/(W2-1))
-    y = ma.where(y==0, 1, y)
-    Z = delta*ma.log(y/alpha + ma.sqrt((y/alpha)**2+1))
-    return Z, (1.0 - stats.zprob(Z))*2
-skewtest.__doc__ = stats.skewtest.__doc__
-
-def kurtosistest(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    n = a.count(axis=axis).astype(float)
-    if np.min(n) < 20:
-        warnings.warn(
-            "kurtosistest only valid for n>=20 ... continuing anyway, n=%i" %
-            np.min(n))
-    b2 = kurtosis(a, axis, fisher=False)
-    E = 3.0*(n-1) /(n+1)
-    varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5))
-    x = (b2-E)/ma.sqrt(varb2)
-    sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * np.sqrt((6.0*(n+3)*(n+5))/
-                                                        (n*(n-2)*(n-3)))
-    A = 6.0 + 8.0/sqrtbeta1 *(2.0/sqrtbeta1 + np.sqrt(1+4.0/(sqrtbeta1**2)))
-    term1 = 1 - 2./(9.0*A)
-    denom = 1 + x*ma.sqrt(2/(A-4.0))
-    denom[denom < 0] = masked
-    term2 = ma.power((1-2.0/A)/denom,1/3.0)
-    Z = ( term1 - term2 ) / np.sqrt(2/(9.0*A))
-    return Z, (1.0-stats.zprob(Z))*2
-kurtosistest.__doc__ = stats.kurtosistest.__doc__
-
-
-def normaltest(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    s,_ = skewtest(a,axis)
-    k,_ = kurtosistest(a,axis)
-    k2 = s*s + k*k
-    return k2, stats.chisqprob(k2,2)
-normaltest.__doc__ = stats.normaltest.__doc__
-
-# Martinez-Iglewicz test
-# K-S test
-
-
-#####--------------------------------------------------------------------------
-#---- --- Percentiles ---
-#####--------------------------------------------------------------------------
-
-
-def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None,
-               limit=()):
-    """Computes empirical quantiles for a *1xN* data array.
-Samples quantile are defined by:
-*Q(p) = (1-g).x[i] +g.x[i+1]*
-where *x[j]* is the jth order statistic,
-with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.
-
-Typical values of (alpha,beta) are:
-
-    - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
-    - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
-    - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
-    - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
-      That's R default (R type 7)
-    - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
-      The resulting quantile estimates are approximately median-unbiased
-      regardless of the distribution of x. (R type 8)
-    - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
-      The resulting quantile estimates are approximately unbiased
-      if x is normally distributed (R type 9)
-    - (.4,.4)  : approximately quantile unbiased (Cunnane)
-    - (.35,.35): APL, used with PWM
-
-Parameters
-----------
-    x : sequence
-        Input data, as a sequence or array of dimension at most 2.
-    prob : sequence
-        List of quantiles to compute.
-    alpha : {0.4, float} optional
-        Plotting positions parameter.
-    beta : {0.4, float} optional
-        Plotting positions parameter.
-    axis : {None, int} optional
-        Axis along which to perform the trimming.
-        If None, the input array is first flattened.
-    limit : tuple
-        Tuple of (lower, upper) values. Values of a outside this closed interval
-        are ignored.
-    """
-    def _quantiles1D(data,m,p):
-        x = np.sort(data.compressed())
-        n = len(x)
-        if n == 0:
-            return ma.array(np.empty(len(p), dtype=float), mask=True)
-        elif n == 1:
-            return ma.array(np.resize(x, p.shape), mask=nomask)
-        aleph = (n*p + m)
-        k = np.floor(aleph.clip(1, n-1)).astype(int)
-        gamma = (aleph-k).clip(0,1)
-        return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]
-
-    # Initialization & checks ---------
-    data = ma.array(data, copy=False)
-    if limit:
-        condition = (limit[0]<data) & (data<limit[1])
-        data[condition.filled(True)] = masked
-    p = np.array(prob, copy=False, ndmin=1)
-    m = alphap + p*(1.-alphap-betap)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        return _quantiles1D(data, m, p)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return ma.apply_along_axis(_quantiles1D, axis, data, m, p)
-
-
-def scoreatpercentile(data, per, limit=(), alphap=.4, betap=.4):
-    """Calculate the score at the given 'per' percentile of the
-    sequence a.  For example, the score at per=50 is the median.
-
-    This function is a shortcut to mquantile
-
-    """
-    if (per < 0) or (per > 100.):
-        raise ValueError("The percentile should be between 0. and 100. !"\
-                         " (got %s)" % per)
-    return mquantiles(data, prob=[per/100.], alphap=alphap, betap=betap,
-                      limit=limit, axis=0).squeeze()
-
-
-def plotting_positions(data, alpha=0.4, beta=0.4):
-    """Returns the plotting positions (or empirical percentile points) for the
-    data.
-    Plotting positions are defined as (i-alpha)/(n-alpha-beta), where:
-        - i is the rank order statistics
-        - n is the number of unmasked values along the given axis
-        - alpha and beta are two parameters.
-
-    Typical values for alpha and beta are:
-        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
-        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
-        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6)
-        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
-          That's R default (R type 7)
-        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
-          The resulting quantile estimates are approximately median-unbiased
-          regardless of the distribution of x. (R type 8)
-        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
-          The resulting quantile estimates are approximately unbiased
-          if x is normally distributed (R type 9)
-        - (.4,.4)  : approximately quantile unbiased (Cunnane)
-        - (.35,.35): APL, used with PWM
-
-Parameters
-----------
-    x : sequence
-        Input data, as a sequence or array of dimension at most 2.
-    prob : sequence
-        List of quantiles to compute.
-    alpha : {0.4, float} optional
-        Plotting positions parameter.
-    beta : {0.4, float} optional
-        Plotting positions parameter.
-
-    """
-    data = ma.array(data, copy=False).reshape(1,-1)
-    n = data.count()
-    plpos = np.empty(data.size, dtype=float)
-    plpos[n:] = 0
-    plpos[data.argsort()[:n]] = (np.arange(1,n+1) - alpha)/(n+1-alpha-beta)
-    return ma.array(plpos, mask=data._mask)
-
-meppf = plotting_positions
-
-#####--------------------------------------------------------------------------
-#---- --- Variability ---
-#####--------------------------------------------------------------------------
-
-def obrientransform(*args):
-    """
-Computes a transform on input data (any number of columns).  Used to
-test for homogeneity of variance prior to running one-way stats.  Each
-array in *args is one level of a factor.  If an F_oneway() run on the
-transformed data and found significant, variances are unequal.   From
-Maxwell and Delaney, p.112.
-
-Returns: transformed data for use in an ANOVA
-    """
-    data = argstoarray(*args).T
-    v = data.var(axis=0,ddof=1)
-    m = data.mean(0)
-    n = data.count(0).astype(float)
-    # result = ((N-1.5)*N*(a-m)**2 - 0.5*v*(n-1))/((n-1)*(n-2))
-    data -= m
-    data **= 2
-    data *= (n-1.5)*n
-    data -= 0.5*v*(n-1)
-    data /= (n-1.)*(n-2.)
-    if not ma.allclose(v,data.mean(0)):
-        raise ValueError("Lack of convergence in obrientransform.")
-    return data
-
-
-def signaltonoise(data, axis=0):
-    """Calculates the signal-to-noise ratio, as the ratio of the mean over
-    standard deviation along the given axis.
-
-    Parameters
-    ----------
-        data : sequence
-            Input data
-        axis : {0, int} optional
-            Axis along which to compute. If None, the computation is performed
-            on a flat version of the array.
-"""
-    data = ma.array(data, copy=False)
-    m = data.mean(axis)
-    sd = data.std(axis, ddof=0)
-    return m/sd
-
-
-def samplevar(data, axis=0):
-    """Returns a biased estimate of the variance of the data, as the average
-    of the squared deviations from the mean.
-
-    Parameters
-    ----------
-        data : sequence
-            Input data
-        axis : {0, int} optional
-            Axis along which to compute. If None, the computation is performed
-            on a flat version of the array.
-    """
-    return ma.asarray(data).var(axis=axis,ddof=0)
-
-
-def samplestd(data, axis=0):
-    """Returns a biased estimate of the standard deviation of the data, as the
-    square root of the average squared deviations from the mean.
-
-    Parameters
-    ----------
-        data : sequence
-            Input data
-        axis : {0,int} optional
-            Axis along which to compute. If None, the computation is performed
-            on a flat version of the array.
-
-    Notes
-    -----
-        samplestd(a) is equivalent to a.std(ddof=0)
-
-    """
-    return ma.asarray(data).std(axis=axis,ddof=0)
-
-
-def var(a,axis=None):
-    return ma.asarray(a).var(axis=axis,ddof=1)
-var.__doc__ = stats.var.__doc__
-
-def std(a,axis=None):
-    return ma.asarray(a).std(axis=axis,ddof=1)
-std.__doc__ = stats.std.__doc__
-
-def stderr(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    return a.std(axis=axis, ddof=1) / ma.sqrt(a.count(axis=axis))
-stderr.__doc__ = stats.stderr.__doc__
-
-def sem(a, axis=0):
-    a, axis = _chk_asarray(a, axis)
-    n = a.count(axis=axis)
-    s = a.std(axis=axis,ddof=0) / ma.sqrt(n-1)
-    return s
-sem.__doc__ = stats.sem.__doc__
-
-def z(a, score):
-    a = ma.asarray(a)
-    z = (score-a.mean(None)) / a.std(axis=None, ddof=1)
-    return z
-z.__doc__ = stats.z.__doc__
-
-def zs(a):
-    a = ma.asarray(a)
-    mu = a.mean(axis=0)
-    sigma = a.std(axis=0,ddof=0)
-    return (a-mu)/sigma
-zs.__doc__ = stats.zs.__doc__
-
-def zmap(scores, compare, axis=0):
-    (scores, compare) = (ma.asarray(scores), ma.asarray(compare))
-    mns = compare.mean(axis=axis)
-    sstd = compare.std(axis=0, ddof=0)
-    return (scores - mns) / sstd
-zmap.__doc__ = stats.zmap.__doc__
-
-
-#####--------------------------------------------------------------------------
-#---- --- ANOVA ---
-#####--------------------------------------------------------------------------
-
-
-def f_oneway(*args):
-    """
-Performs a 1-way ANOVA, returning an F-value and probability given
-any number of groups.  From Heiman, pp.394-7.
-
-Usage:   f_oneway (*args)    where *args is 2 or more arrays, one per
-                                  treatment group
-Returns: f-value, probability
-"""
-    # Construct a single array of arguments: each row is a group
-    data = argstoarray(*args)
-    ngroups = len(data)
-    ntot = data.count()
-    sstot = (data**2).sum() - (data.sum())**2/float(ntot)
-    ssbg = (data.count(-1) * (data.mean(-1)-data.mean())**2).sum()
-    sswg = sstot-ssbg
-    dfbg = ngroups-1
-    dfwg = ntot - ngroups
-    msb = ssbg/float(dfbg)
-    msw = sswg/float(dfwg)
-    f = msb/msw
-    prob = stats.fprob(dfbg,dfwg,f)
-    return f, prob
-
-
-def f_value_wilks_lambda(ER, EF, dfnum, dfden, a, b):
-    """Calculation of Wilks lambda F-statistic for multivarite data, per
-    Maxwell & Delaney p.657.
-    """
-    ER = ma.array(ER, copy=False, ndmin=2)
-    EF = ma.array(EF, copy=False, ndmin=2)
-    if ma.getmask(ER).any() or ma.getmask(EF).any():
-        raise NotImplementedError("Not implemented when the inputs "\
-                                  "have missing data")
-    lmbda = np.linalg.det(EF) / np.linalg.det(ER)
-    q = ma.sqrt( ((a-1)**2*(b-1)**2 - 2) / ((a-1)**2 + (b-1)**2 -5) )
-    q = ma.filled(q, 1)
-    n_um = (1 - lmbda**(1.0/q))*(a-1)*(b-1)
-    d_en = lmbda**(1.0/q) / (n_um*q - 0.5*(a-1)*(b-1) + 1)
-    return n_um / d_en
-
-
-
-def friedmanchisquare(*args):
-    """Friedman Chi-Square is a non-parametric, one-way within-subjects ANOVA.
-    This function calculates the Friedman Chi-square test for repeated measures
-    and returns the result, along with the associated probability value.
-
-    Each input is considered a given group. Ideally, the number of treatments
-    among each group should be equal. If this is not the case, only the first
-    n treatments are taken into account, where n is the number of treatments
-    of the smallest group.
-    If a group has some missing values, the corresponding treatments are masked
-    in the other groups.
-    The test statistic is corrected for ties.
-
-    Masked values in one group are propagated to the other groups.
-
-    Returns: chi-square statistic, associated p-value
-    """
-    data = argstoarray(*args).astype(float)
-    k = len(data)
-    if k < 3:
-        raise ValueError("Less than 3 groups (%i): " % k +\
-                         "the Friedman test is NOT appropriate.")
-    ranked = ma.masked_values(rankdata(data, axis=0), 0)
-    if ranked._mask is not nomask:
-        ranked = ma.mask_cols(ranked)
-        ranked = ranked.compressed().reshape(k,-1).view(ndarray)
-    else:
-        ranked = ranked._data
-    (k,n) = ranked.shape
-    # Ties correction
-    repeats = np.array([find_repeats(_) for _ in ranked.T], dtype=object)
-    ties = repeats[repeats.nonzero()].reshape(-1,2)[:,-1].astype(int)
-    tie_correction = 1 - (ties**3-ties).sum()/float(n*(k**3-k))
-    #
-    ssbg = np.sum((ranked.sum(-1) - n*(k+1)/2.)**2)
-    chisq = ssbg * 12./(n*k*(k+1)) * 1./tie_correction
-    return chisq, stats.chisqprob(chisq,k-1)
-
-#-############################################################################-#

Copied: trunk/scipy/stats/mstats_basic.py (from rev 5134, trunk/scipy/stats/mstats.py)

Copied: trunk/scipy/stats/mstats_extras.py (from rev 5134, trunk/scipy/stats/mmorestats.py)

Deleted: trunk/scipy/stats/tests/test_mmorestats.py
===================================================================
--- trunk/scipy/stats/tests/test_mmorestats.py	2008-11-21 21:09:29 UTC (rev 5159)
+++ trunk/scipy/stats/tests/test_mmorestats.py	2008-11-22 05:26:45 UTC (rev 5160)
@@ -1,102 +0,0 @@
-# pylint: disable-msg=W0611, W0612, W0511,R0201
-"""Tests suite for maskedArray statistics.
-
-:author: Pierre Gerard-Marchant
-:contact: pierregm_at_uga_dot_edu
-"""
-__author__ = "Pierre GF Gerard-Marchant ($Author: backtopop $)"
-
-import numpy as np
-
-import numpy.ma as ma
-
-import scipy.stats.mstats as ms
-import scipy.stats.mmorestats as mms
-
-from numpy.testing import *
-
-
-class TestMisc(TestCase):
-    #
-    def __init__(self, *args, **kwargs):
-        TestCase.__init__(self, *args, **kwargs)
-    #
-    def test_mjci(self):
-        "Tests the Marits-Jarrett estimator"
-        data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
-                          296,299,306,376,428,515,666,1310,2611])
-        assert_almost_equal(mms.mjci(data),[55.76819,45.84028,198.87875],5)
-    #
-    def test_trimmedmeanci(self):
-        "Tests the confidence intervals of the trimmed mean."
-        data = ma.array([545,555,558,572,575,576,578,580,
-                         594,605,635,651,653,661,666])
-        assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
-        assert_equal(np.round(mms.trimmed_mean_ci(data,(0.2,0.2)),1),
-                     [561.8, 630.6])
-    #
-    def test_idealfourths(self):
-        "Tests ideal-fourths"
-        test = np.arange(100)
-        assert_almost_equal(np.asarray(mms.idealfourths(test)),
-                            [24.416667,74.583333],6)
-        test_2D = test.repeat(3).reshape(-1,3)
-        assert_almost_equal(mms.idealfourths(test_2D, axis=0),
-                            [[24.416667,24.416667,24.416667],
-                             [74.583333,74.583333,74.583333]],6)
-        assert_almost_equal(mms.idealfourths(test_2D, axis=1),
-                            test.repeat(2).reshape(-1,2))
-        test = [0,0]
-        _result = mms.idealfourths(test)
-        assert(np.isnan(_result).all())
-
-#..............................................................................
-class TestQuantiles(TestCase):
-    #
-    def __init__(self, *args, **kwargs):
-        TestCase.__init__(self, *args, **kwargs)
-    #
-    def test_hdquantiles(self):
-        data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
-            0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
-            0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
-            0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
-            0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
-            0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
-            0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
-            0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
-            0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
-            0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
-            0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
-            0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
-            0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
-            0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
-            0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
-            0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
-            0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
-            0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
-            0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
-            0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
-        #
-        assert_almost_equal(mms.hdquantiles(data,[0., 1.]),
-                            [0.006514031, 0.995309248])
-        hdq = mms.hdquantiles(data,[0.25, 0.5, 0.75])
-        assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
-        hdq = mms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
-        assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
-        #
-        data = np.array(data).reshape(10,10)
-        hdq = mms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
-        assert_almost_equal(hdq[:,0], mms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
-        assert_almost_equal(hdq[:,-1], mms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
-        hdq = mms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
-        assert_almost_equal(hdq[...,0],
-                            mms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
-        assert_almost_equal(hdq[...,-1],
-                            mms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
-
-
-###############################################################################
-
-if __name__ == "__main__":
-    run_module_suite()

Deleted: trunk/scipy/stats/tests/test_mstats.py
===================================================================
--- trunk/scipy/stats/tests/test_mstats.py	2008-11-21 21:09:29 UTC (rev 5159)
+++ trunk/scipy/stats/tests/test_mstats.py	2008-11-22 05:26:45 UTC (rev 5160)
@@ -1,509 +0,0 @@
-"""
-Tests for the stats.mstats module (support for maskd arrays)
-"""
-
-
-import numpy as np
-from numpy import nan
-import numpy.ma as ma
-from numpy.ma import masked, nomask
-
-import scipy.stats.mstats as mstats
-from numpy.testing import *
-from numpy.ma.testutils import assert_equal, assert_almost_equal, \
-    assert_array_almost_equal
-
-
-class TestGMean(TestCase):
-    def test_1D(self):
-        a = (1,2,3,4)
-        actual= mstats.gmean(a)
-        desired = np.power(1*2*3*4,1./4.)
-        assert_almost_equal(actual, desired,decimal=14)
-
-        desired1 = mstats.gmean(a,axis=-1)
-        assert_almost_equal(actual, desired1, decimal=14)
-        assert not isinstance(desired1, ma.MaskedArray)
-        #
-        a = ma.array((1,2,3,4),mask=(0,0,0,1))
-        actual= mstats.gmean(a)
-        desired = np.power(1*2*3,1./3.)
-        assert_almost_equal(actual, desired,decimal=14)
-
-        desired1 = mstats.gmean(a,axis=-1)
-        assert_almost_equal(actual, desired1, decimal=14)
-    #
-    def test_2D(self):
-        a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
-                     mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
-        actual= mstats.gmean(a)
-        desired = np.array((1,2,3,4))
-        assert_array_almost_equal(actual, desired, decimal=14)
-        #
-        desired1 = mstats.gmean(a,axis=0)
-        assert_array_almost_equal(actual, desired1, decimal=14)
-        #
-        actual= mstats.gmean(a, -1)
-        desired = ma.array((np.power(1*2*3*4,1./4.),
-                            np.power(2*3,1./2.),
-                            np.power(1*4,1./2.)))
-        assert_array_almost_equal(actual, desired, decimal=14)
-
-class TestHMean(TestCase):
-    def test_1D(self):
-        a = (1,2,3,4)
-        actual= mstats.hmean(a)
-        desired =  4. / (1./1 + 1./2 + 1./3 + 1./4)
-        assert_almost_equal(actual, desired, decimal=14)
-        desired1 = mstats.hmean(ma.array(a),axis=-1)
-        assert_almost_equal(actual, desired1, decimal=14)
-        #
-        a = ma.array((1,2,3,4),mask=(0,0,0,1))
-        actual= mstats.hmean(a)
-        desired = 3. / (1./1 + 1./2 + 1./3)
-        assert_almost_equal(actual, desired,decimal=14)
-        desired1 = mstats.hmean(a,axis=-1)
-        assert_almost_equal(actual, desired1, decimal=14)
-
-    def test_2D(self):
-        a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
-                     mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
-        actual= mstats.hmean(a)
-        desired = ma.array((1,2,3,4))
-        assert_array_almost_equal(actual, desired, decimal=14)
-        #
-        actual1 = mstats.hmean(a,axis=-1)
-        desired = (4./(1/1.+1/2.+1/3.+1/4.),
-                   2./(1/2.+1/3.),
-                   2./(1/1.+1/4.)
-                   )
-        assert_array_almost_equal(actual1, desired, decimal=14)
-
-
-class TestRanking(TestCase):
-    #
-    def __init__(self, *args, **kwargs):
-        TestCase.__init__(self, *args, **kwargs)
-    #
-    def test_ranking(self):
-        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
-        assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
-        x[[3,4]] = masked
-        assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
-        assert_almost_equal(mstats.rankdata(x,use_missing=True),
-                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
-        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
-        assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
-        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
-        assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]])
-        assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]])
-        assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])
-
-
-class TestCorr(TestCase):
-    #
-    def test_pearsonr(self):
-        "Tests some computations of Pearson's r"
-        x = ma.arange(10)
-        assert_almost_equal(mstats.pearsonr(x,x)[0], 1.0)
-        assert_almost_equal(mstats.pearsonr(x,x[::-1])[0], -1.0)
-        #
-        x = ma.array(x, mask=True)
-        pr = mstats.pearsonr(x,x)
-        assert(pr[0] is masked)
-        assert(pr[1] is masked)
-    #
-    def test_spearmanr(self):
-        "Tests some computations of Spearman's rho"
-        (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
-        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
-        (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
-        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
-        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
-        #
-        x = [ 2.0, 47.4, 42.0, 10.8, 60.1,  1.7, 64.0, 63.1,
-              1.0,  1.4,  7.9,  0.3,  3.9,  0.3,  6.7]
-        y = [22.6, 08.3, 44.4, 11.9, 24.6,  0.6,  5.7, 41.6,
-              0.0,  0.6,  6.7,  3.8,  1.0,  1.2,  1.4]
-        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
-        x = [ 2.0, 47.4, 42.0, 10.8, 60.1,  1.7, 64.0, 63.1,
-              1.0,  1.4,  7.9,  0.3,  3.9,  0.3,  6.7, np.nan]
-        y = [22.6, 08.3, 44.4, 11.9, 24.6,  0.6,  5.7, 41.6,
-              0.0,  0.6,  6.7,  3.8,  1.0,  1.2,  1.4, np.nan]
-        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
-        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
-    #
-    def test_kendalltau(self):
-        "Tests some computations of Kendall's tau"
-        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
-        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
-        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
-        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
-                            [+0.3333333,0.4969059])
-        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
-                            [-0.5477226,0.2785987])
-        #
-        x = ma.fix_invalid([ 0, 0, 0, 0,20,20, 0,60, 0,20,
-                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
-        y = ma.fix_invalid([ 0,80,80,80,10,33,60, 0,67,27,
-                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
-        result = mstats.kendalltau(x,y)
-        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
-    #
-    def test_kendalltau_seasonal(self):
-        "Tests the seasonal Kendall tau."
-        x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
-             [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
-             [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
-             [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
-        x = ma.fix_invalid(x).T
-        output = mstats.kendalltau_seasonal(x)
-        assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
-        assert_almost_equal(output['seasonal p-value'].round(2),
-                            [0.18,0.53,0.20,0.04])
-    #
-    def test_pointbiserial(self):
-        "Tests point biserial"
-        x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
-             0,0,0,0,1,-1]
-        y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
-             2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
-             0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
-        assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
-    #
-    def test_cov(self):
-        "Tests the cov function."
-        x = ma.array([[1,2,3],[4,5,6]], mask=[[1,0,0],[0,0,0]])
-        c = mstats.cov(x[0])
-        assert_equal(c, x[0].var(ddof=1))
-        c = mstats.cov(x[1])
-        assert_equal(c, x[1].var(ddof=1))
-        c = mstats.cov(x)
-        assert_equal(c[1,0], (x[0].anom()*x[1].anom()).sum())
-        #
-        x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
-             [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
-             [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
-             [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
-        x = ma.fix_invalid(x).T
-        (winter,spring,summer,fall) = x.T
-        #
-        assert_almost_equal(mstats.cov(winter,winter,bias=True),
-                            winter.var(ddof=0))
-        assert_almost_equal(mstats.cov(winter,winter,bias=False),
-                            winter.var(ddof=1))
-        assert_almost_equal(mstats.cov(winter,spring)[0,1], 7.7)
-        assert_almost_equal(mstats.cov(winter,spring)[1,0], 7.7)
-        assert_almost_equal(mstats.cov(winter,summer)[0,1], 19.1111111, 7)
-        assert_almost_equal(mstats.cov(winter,summer)[1,0], 19.1111111, 7)
-        assert_almost_equal(mstats.cov(winter,fall)[0,1], 20)
-        assert_almost_equal(mstats.cov(winter,fall)[1,0], 20)
-
-
-class TestTrimming(TestCase):
-    #
-    def test_trim(self):
-        "Tests trimming"
-        a = ma.arange(10)
-        assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
-        a = ma.arange(10)
-        assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
-        a = ma.arange(10)
-        assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
-                     [None,None,None,3,4,5,6,7,None,None])
-        a = ma.arange(10)
-        assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
-                     [None,1,2,3,4,5,6,7,None,None])
-        #
-        a = ma.arange(12)
-        a[[0,-1]] = a[5] = masked
-        assert_equal(mstats.trim(a,(2,8)),
-                     [None,None,2,3,4,None,6,7,8,None,None,None])
-        #
-        x = ma.arange(100).reshape(10,10)
-        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
-        assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
-        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
-        assert_equal(trimx._mask.ravel(),[1]*10+[0]*70+[1]*20)
-        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=-1)
-        assert_equal(trimx._mask.T.ravel(),[1]*10+[0]*70+[1]*20)
-        #
-        x = ma.arange(110).reshape(11,10)
-        x[1] = masked
-        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=None)
-        assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
-        trimx = mstats.trim(x,(0.1,0.2),relative=True,axis=0)
-        assert_equal(trimx._mask.ravel(),[1]*20+[0]*70+[1]*20)
-        trimx = mstats.trim(x.T,(0.1,0.2),relative=True,axis=-1)
-        assert_equal(trimx.T._mask.ravel(),[1]*20+[0]*70+[1]*20)
-    #
-    def test_trim_old(self):
-        "Tests trimming."
-        x = ma.arange(100)
-        assert_equal(mstats.trimboth(x).count(), 60)
-        assert_equal(mstats.trimtail(x,tail='r').count(), 80)
-        x[50:70] = masked
-        trimx = mstats.trimboth(x)
-        assert_equal(trimx.count(), 48)
-        assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
-        x._mask = nomask
-        x.shape = (10,10)
-        assert_equal(mstats.trimboth(x).count(), 60)
-        assert_equal(mstats.trimtail(x).count(), 80)
-    #
-    def test_trimmedmean(self):
-        "Tests the trimmed mean."
-        data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
-                         296,299,306,376,428,515,666,1310,2611])
-        assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
-        assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
-        assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
-    #
-    def test_trimmed_stde(self):
-        "Tests the trimmed mean standard error."
-        data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
-                         296,299,306,376,428,515,666,1310,2611])
-        assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
-        assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
-    #
-    def test_winsorization(self):
-        "Tests the Winsorization of the data."
-        data = ma.array([ 77, 87, 88,114,151,210,219,246,253,262,
-                         296,299,306,376,428,515,666,1310,2611])
-        assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
-                            21551.4, 1)
-        data[5] = masked
-        winsorized = mstats.winsorize(data)
-        assert_equal(winsorized.mask, data.mask)
-
-
-class TestMoments(TestCase):
-    """
-        Comparison numbers are found using R v.1.5.1
-        note that length(testcase) = 4
-        testmathworks comes from documentation for the
-        Statistics Toolbox for Matlab and can be found at both
-        http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml
-        http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml
-        Note that both test cases came from here.
-    """
-    testcase = [1,2,3,4]
-    testmathworks = ma.fix_invalid([1.165 , 0.6268, 0.0751, 0.3516, -0.6965,
-                                    np.nan])
-    def test_moment(self):
-        """
-        mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))"""
-        y = mstats.moment(self.testcase,1)
-        assert_almost_equal(y,0.0,10)
-        y = mstats.moment(self.testcase,2)
-        assert_almost_equal(y,1.25)
-        y = mstats.moment(self.testcase,3)
-        assert_almost_equal(y,0.0)
-        y = mstats.moment(self.testcase,4)
-        assert_almost_equal(y,2.5625)
-    def test_variation(self):
-        """variation = samplestd/mean """
-##        y = stats.variation(self.shoes[0])
-##        assert_almost_equal(y,21.8770668)
-        y = mstats.variation(self.testcase)
-        assert_almost_equal(y,0.44721359549996, 10)
-
-    def test_skewness(self):
-        """
-            sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5
-        """
-        y = mstats.skew(self.testmathworks)
-        assert_almost_equal(y,-0.29322304336607,10)
-        y = mstats.skew(self.testmathworks,bias=0)
-        assert_almost_equal(y,-0.437111105023940,10)
-        y = mstats.skew(self.testcase)
-        assert_almost_equal(y,0.0,10)
-
-    def test_kurtosis(self):
-        """
-            sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
-            sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
-            Set flags for axis = 0 and
-            fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
-        """
-        y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
-        assert_almost_equal(y, 2.1658856802973,10)
-        # Note that MATLAB has confusing docs for the following case
-        #  kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
-        #  kurtosis(x)  gives a biased estimate of Fisher's skewness (Pearson-3)
-        #  The MATLAB docs imply that both should give Fisher's
-        y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0)
-        assert_almost_equal(y, 3.663542721189047,10)
-        y = mstats.kurtosis(self.testcase,0,0)
-        assert_almost_equal(y,1.64)
-    #
-    def test_mode(self):
-        "Tests the mode"
-        #
-        a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
-        a2 = np.reshape(a1, (3,5))
-        ma1 = ma.masked_where(ma.array(a1)>2,a1)
-        ma2 = ma.masked_where(a2>2, a2)
-        assert_equal(mstats.mode(a1, axis=None), (3,4))
-        assert_equal(mstats.mode(ma1, axis=None), (0,3))
-        assert_equal(mstats.mode(a2, axis=None), (3,4))
-        assert_equal(mstats.mode(ma2, axis=None), (0,3))
-        assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
-        assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]],[[1,1,1,1,1]]))
-        assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
-        assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
-
-
-class TestPercentile(TestCase):
-    def setUp(self):
-        self.a1 = [3,4,5,10,-3,-5,6]
-        self.a2 = [3,-6,-2,8,7,4,2,1]
-        self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
-
-    def test_percentile(self):
-        x = np.arange(8) * 0.5
-        assert_equal(mstats.scoreatpercentile(x, 0), 0.)
-        assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
-        assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
-
-    def test_2D(self):
-        x = ma.array([[1, 1, 1],
-                      [1, 1, 1],
-                      [4, 4, 3],
-                      [1, 1, 1],
-                      [1, 1, 1]])
-        assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
-
-
-class TestVariability(TestCase):
-    """  Comparison numbers are found using R v.1.5.1
-         note that length(testcase) = 4
-    """
-    testcase = ma.fix_invalid([1,2,3,4,np.nan])
-    #
-    def test_std(self):
-        y = mstats.std(self.testcase)
-        assert_almost_equal(y,1.290994449)
-
-    def test_var(self):
-        """
-        var(testcase) = 1.666666667 """
-        #y = stats.var(self.shoes[0])
-        #assert_approx_equal(y,6.009)
-        y = mstats.var(self.testcase)
-        assert_almost_equal(y,1.666666667)
-
-    def test_samplevar(self):
-        """
-        R does not have 'samplevar' so the following was used
-        var(testcase)*(4-1)/4  where 4 = length(testcase)
-        """
-        #y = stats.samplevar(self.shoes[0])
-        #assert_approx_equal(y,5.4081)
-        y = mstats.samplevar(self.testcase)
-        assert_almost_equal(y,1.25)
-
-    def test_samplestd(self):
-        #y = stats.samplestd(self.shoes[0])
-        #assert_approx_equal(y,2.325532197)
-        y = mstats.samplestd(self.testcase)
-        assert_almost_equal(y,1.118033989)
-
-    def test_signaltonoise(self):
-        """
-        this is not in R, so used
-        mean(testcase,axis=0)/(sqrt(var(testcase)*3/4)) """
-        #y = stats.signaltonoise(self.shoes[0])
-        #assert_approx_equal(y,4.5709967)
-        y = mstats.signaltonoise(self.testcase)
-        assert_almost_equal(y,2.236067977)
-
-    def test_stderr(self):
-        """
-        this is not in R, so used
-        sqrt(var(testcase))/sqrt(4)
-        """
-##        y = stats.stderr(self.shoes[0])
-##        assert_approx_equal(y,0.775177399)
-        y = mstats.stderr(self.testcase)
-        assert_almost_equal(y,0.6454972244)
-
-    def test_sem(self):
-        """
-        this is not in R, so used
-        sqrt(var(testcase)*3/4)/sqrt(3)
-        """
-        #y = stats.sem(self.shoes[0])
-        #assert_approx_equal(y,0.775177399)
-        y = mstats.sem(self.testcase)
-        assert_almost_equal(y,0.6454972244)
-
-    def test_z(self):
-        """
-        not in R, so used
-        (10-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
-        """
-        y = mstats.z(self.testcase, ma.array(self.testcase).mean())
-        assert_almost_equal(y,0.0)
-
-    def test_zs(self):
-        """
-        not in R, so tested by using
-        (testcase[i]-mean(testcase,axis=0))/sqrt(var(testcase)*3/4)
-        """
-        y = mstats.zs(self.testcase)
-        desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996 ,
-                                  0.44721359549996 , 1.3416407864999, np.nan])
-        assert_almost_equal(desired,y,decimal=12)
-
-
-
-class TestMisc(TestCase):
-    #
-    def test_obrientransform(self):
-        "Tests Obrien transform"
-        args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
-                [6]+[7]*2+[8]*4+[9]*9+[10]*16]
-        result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
-                  [10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
-        assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
-                            result,4)
-    #
-    def test_kstwosamp(self):
-        "Tests the Kolmogorov-Smirnov 2 samples test"
-        x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
-             [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
-             [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
-             [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
-        x = ma.fix_invalid(x).T
-        (winter,spring,summer,fall) = x.T
-        #
-        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
-                            (0.1818,0.9892))
-        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
-                            (0.1469,0.7734))
-        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
-                            (0.1818,0.6744))
-    #
-    def test_friedmanchisq(self):
-        "Tests the Friedman Chi-square test"
-        # No missing values
-        args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
-                [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
-                [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
-        result = mstats.friedmanchisquare(*args)
-        assert_almost_equal(result[0], 10.4737, 4)
-        assert_almost_equal(result[1], 0.005317, 6)
-        # Missing values
-        x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
-             [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
-             [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
-             [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
-        x = ma.fix_invalid(x)
-        result = mstats.friedmanchisquare(*x)
-        assert_almost_equal(result[0], 2.0156, 4)
-        assert_almost_equal(result[1], 0.5692, 4)
-
-
-if __name__ == "__main__":
-    run_module_suite()

Copied: trunk/scipy/stats/tests/test_mstats_basic.py (from rev 5134, trunk/scipy/stats/tests/test_mstats.py)

Copied: trunk/scipy/stats/tests/test_mstats_extras.py (from rev 5134, trunk/scipy/stats/tests/test_mmorestats.py)




More information about the Scipy-svn mailing list