[Scipy-svn] r2877 - in trunk/Lib/sandbox/maskedarray: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Tue Mar 27 15:12:33 EDT 2007


Author: pierregm
Date: 2007-03-27 14:12:29 -0500 (Tue, 27 Mar 2007)
New Revision: 2877

Added:
   trunk/Lib/sandbox/maskedarray/mstats.py
   trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
Modified:
   trunk/Lib/sandbox/maskedarray/core.py
   trunk/Lib/sandbox/maskedarray/tests/test_core.py
Log:
core   : fixed .sort for ndarrays
mstats : misc. statistics supporting masked arrays (quantiles +  medians)

Modified: trunk/Lib/sandbox/maskedarray/core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/core.py	2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/core.py	2007-03-27 19:12:29 UTC (rev 2877)
@@ -1775,20 +1775,24 @@
     |'heapsort' |   3   | O(n*log(n)) |     0      |   no  |
     |------------------------------------------------------|
 
-    All the sort algorithms make temporary copies of the data when the sort is
-    not along the last axis. Consequently, sorts along the last axis are faster
-    and use less space than sorts along other axis.
-
     """
-        if fill_value is None:
-            if endwith:
-                filler = minimum_fill_value(self)
+        if self._mask is nomask:
+            ndarray.sort(self,axis=axis, kind=kind, order=order)
+        else:
+            if fill_value is None:
+                if endwith:
+                    filler = minimum_fill_value(self)
+                else:
+                    filler = maximum_fill_value(self)
             else:
-                filler = maximum_fill_value(self)
-        else:
-            filler = fill_value
-        indx = self.filled(filler).argsort(axis=axis,kind=kind,order=order)
-        self[:] = self[indx]
+                filler = fill_value
+            idx = numpy.indices(self.shape)
+            idx[axis] = self.filled(filler).argsort(axis=axis,kind=kind,order=order)
+            idx_l = idx.tolist()
+            tmp_mask = self._mask[idx_l].flat
+            tmp_data = self._data[idx_l].flat
+            self.flat = tmp_data
+            self._mask.flat = tmp_mask
         return
     #............................................
     def min(self, axis=None, fill_value=None):
@@ -2620,18 +2624,35 @@
 if __name__ == '__main__':
     import numpy as N
     from maskedarray.testutils import assert_equal, assert_array_equal
+    marray = masked_array
     #
-    a = arange(10)
-    a[::3] = masked
-    a.fill_value = 999
-    a_pickled = cPickle.loads(a.dumps())
-    assert_equal(a_pickled._mask, a._mask)
-    assert_equal(a_pickled._data, a._data)
-    assert_equal(a_pickled.fill_value, 999)
+    if 0:
+        a = arange(10)
+        a[::3] = masked
+        a.fill_value = 999
+        a_pickled = cPickle.loads(a.dumps())
+        assert_equal(a_pickled._mask, a._mask)
+        assert_equal(a_pickled._data, a._data)
+        assert_equal(a_pickled.fill_value, 999)
+        #
+        a = array(numpy.matrix(range(10)), mask=[1,0,1,0,0]*2)
+        a_pickled = cPickle.loads(a.dumps())
+        assert_equal(a_pickled._mask, a._mask)
+        assert_equal(a_pickled, a)
+        assert(isinstance(a_pickled._data,numpy.matrix))
     #
-    a = array(numpy.matrix(range(10)), mask=[1,0,1,0,0]*2)
-    a_pickled = cPickle.loads(a.dumps())
-    assert_equal(a_pickled._mask, a._mask)
-    assert_equal(a_pickled, a)
-    assert(isinstance(a_pickled._data,numpy.matrix))
     
+    #
+    if 1:
+        x = marray(numpy.linspace(-1.,1.,31),)
+        x[:10] = x[-10:] = masked
+        z = marray(numpy.empty((len(x),3), dtype=numpy.float_))
+        z[:,0] = x[:]
+        for i in range(1,3):
+            idx = numpy.arange(len(x))
+            numpy.random.shuffle(idx)
+            z[:,i] = x[idx]
+        #
+        z.sort(0)
+        
+    

Added: trunk/Lib/sandbox/maskedarray/mstats.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/mstats.py	2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/mstats.py	2007-03-27 19:12:29 UTC (rev 2877)
@@ -0,0 +1,209 @@
+"""
+Generic statistics functions, with support to MA.
+
+:author: Pierre GF Gerard-Marchant
+:contact: pierregm_at_uga_edu
+:date: $Date$
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__     = '$Date$'
+
+
+import numpy
+from numpy import bool_, float_, int_
+from numpy import array as narray
+from numpy.core import numeric as numeric
+
+import maskedarray as MA
+from maskedarray.core import masked, nomask, MaskedArray
+from maskedarray.core import masked_array as marray
+from maskedarray.extras import apply_along_axis
+
+
+def _quantiles_1D(data,m,p):
+    """Returns quantiles for 1D compressed data. 
+    Used internally by `mquantiles`.
+    
+:Parameters:
+    data : ndarray
+        Array to quantize
+    m : Sequence
+    p : float ndarray
+        Quantiles to compute
+    """
+    n = data.count()
+    if n == 0:
+        return MA.resize(masked, len(p))
+    elif n == 1:
+        return MA.resize(data,len(p))
+    data = data.compressed()
+    aleph = (n*p + m)
+    k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
+    gamma = (aleph-k).clip(0,1)
+    y = MA.sort(data)
+    return (1.-gamma)*y[(k-1).tolist()] + gamma*y[k.tolist()]
+
+def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None):
+    """Computes empirical quantiles for a *1xN* data array.
+Samples quantile are defined by:
+*Q(p) = (1-g).x[i] +g.x[i+1]*
+where *x[j]* is the jth order statistic, 
+with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.
+
+Typical values of (alpha,beta) are:
+    
+    - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
+    - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
+    - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
+    - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
+      That's R default (R type 7)
+    - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
+      The resulting quantile estimates are approximately median-unbiased
+      regardless of the distribution of x. (R type 8)
+    - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
+      The resulting quantile estimates are approximately unbiased
+      if x is normally distributed (R type 9)
+    - (.4,.4)  : approximately quantile unbiased (Cunnane)
+    - (.35,.35): APL, used with PWM
+
+:Parameters:
+    x : Sequence
+        Input data, as a sequence or array of dimension at most 2.
+    prob : Sequence *[(0.25, 0.5, 0.75)]*
+        List of quantiles to compute.
+    alpha : Float (*[0.4]*)
+        Plotting positions parameter.
+    beta : Float (*[0.4]*)
+        Plotting positions parameter.
+    axis : Integer *[None]*
+        Axis along which to compute quantiles. If *None*, uses the whole 
+        (flattened/compressed) dataset.
+    """
+
+    # Initialization & checks ---------
+    data = marray(data, copy=False)
+    assert data.ndim <= 2, "Array should be 2D at most !"
+    p = narray(prob, copy=False, ndmin=1)
+    m = alphap + p*(1.-alphap-betap)
+    # Computes quantiles along axis (or globally)
+    if (axis is None): 
+        return _quantiles_1D(data, m, p)
+    else:
+        return apply_along_axis(_quantiles_1D, axis, data, m, p)
+    
+
+def _median1d(data):
+    """Returns the median of a 1D masked array. Used internally by mmedian."""
+    datac = data.compressed()
+    if datac.size > 0:
+        return numpy.median(data.compressed())
+    return masked
+
+def _median2d_1(data):
+    data = marray(data, subok=True, copy=True)
+    if data._mask is nomask:
+        return numpy.median(data)
+    if data.ndim != 2 :
+        raise ValueError("Input array should be 2D!")
+    (n,p) = data.shape
+    if p < n//3:
+        return apply_along_axis(_median1d, 0, data)
+    data.sort(axis=0)
+    counts = data.count(axis=0)
+    midx = (counts//2)
+    midx_even = (counts%2==0)
+    med = marray(numeric.empty((data.shape[-1],), dtype=data.dtype))
+    med[midx_even] = (data[midx-1]+data[midx])/2.
+    med[numpy.logical_not(midx_even)] = data[midx]
+    if not med._mask.any():
+        med._mask = nomask
+    return med
+             
+def _median2d_2(data):
+    return apply_along_axis(_median1d, 0, data)
+    
+
+def mmedian(data):
+    """Returns the median of data along the first axis. Missing data are discarded."""
+    data = marray(data, subok=True, copy=True)
+    if data._mask is nomask:
+        return numpy.median(data)
+    if data.ndim == 1:
+        return _median1d(data)
+#    elif data.ndim == 2:
+#        (n, p) = data.shape
+#        if p < n//3:
+#            return apply_along_axis(_median1d, 0, data)
+#        data.sort(axis=0)
+#        counts = data.count(axis=0)
+#        midx = (counts//2)
+#        midx_even = (counts%2==0)
+#        med = marray(numeric.empty((p,), dtype=data.dtype))
+#        med[midx_even] = (data[midx-1]+data[midx])/2.
+#        med[numpy.logical_not(midx_even)] = data[midx]
+#        if not med._mask.any():
+#            med._mask = nomask
+#        return med
+    return apply_along_axis(_median1d, 0, data)
+    
+
+
+################################################################################
+if __name__ == '__main__':
+    from maskedarray.testutils import assert_almost_equal, assert_equal
+    import timeit
+    import maskedarray
+    
+    if 1:
+        (n,p) = (101,30)
+        x = marray(numpy.linspace(-1.,1.,n),)
+        x[:10] = x[-10:] = masked
+        z = marray(numpy.empty((n,p), dtype=numpy.float_))
+        z[:,0] = x[:]
+        idx = numpy.arange(len(x))
+        for i in range(1,p):
+            numpy.random.shuffle(idx)
+            z[:,i] = x[idx]
+    
+        assert_equal(mmedian(z[:,0]), 0)
+        assert_equal(mmedian(z), numpy.zeros((p,)))
+        
+        x = maskedarray.arange(24).reshape(3,4,2)
+        x[x%3==0] = masked
+        assert_equal(mmedian(x), [[12,9],[6,15],[12,9],[18,15]])
+        x.shape = (4,3,2)
+        assert_equal(mmedian(x),[[99,10],[11,99],[13,14]])
+        x = maskedarray.arange(24).reshape(4,3,2)
+        x[x%5==0] = masked
+        assert_equal(mmedian(x), [[12,10],[8,9],[16,17]])
+        
+        
+        """  [[[0 1],  [2  3],  [4 5]]
+              [[6 7],  [8  9],  [10 11]]
+              [[9 13]  [14 15]  [16 17]]
+             [[18 19]  [20 21]  [22 23]]],
+
+ [[[-- 1]  [2 --]  [4 5]   [-- 7]]
+  [[8 --]  [10 11] [-- 13] [14 --]]
+ [[16 17]  [-- 19] [20 --] [22 23]]],
+
+        """
+    
+    if 0:
+        print "GO!"
+        med_setup1 = "from __main__ import _median2d_1,z"
+        med_setup3 = "from __main__ import mmedian,z"
+        med_setup2 = "from __main__ import _median2d_2,z"
+        (nrep, nloop) = (3,10)
+        med_r1 = timeit.Timer("_median2d_1(z)", med_setup1).repeat(nrep,nloop)
+        med_r2 = timeit.Timer("_median2d_2(z)", med_setup2).repeat(nrep,nloop)
+        med_r3 = timeit.Timer("mmedian(z)", med_setup3).repeat(nrep,nloop)
+        med_r1 = numpy.sort(med_r1)
+        med_r2 = numpy.sort(med_r2)
+        med_r3 = numpy.sort(med_r3)
+        print "median2d_1 : %s" % med_r1
+        print "median2d_2 : %s" % med_r2
+        print "median     : %s" % med_r3
\ No newline at end of file


Property changes on: trunk/Lib/sandbox/maskedarray/mstats.py
___________________________________________________________________
Name: svn:keywords
   + Date 
Author 
Revision
Id

Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_core.py	2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/tests/test_core.py	2007-03-27 19:12:29 UTC (rev 2877)
@@ -1105,6 +1105,47 @@
         assert_equal(sortedx._data, [1,2,-2,-1,0])
         assert_equal(sortedx._mask, [1,1,0,0,0])
     
+    def check_sort_2d(self):
+        "Check sort of 2D array."
+        # 2D array w/o mask
+        a = masked_array([[8,4,1],[2,0,9]])
+        a.sort(0)
+        assert_equal(a, [[2,0,1],[8,4,9]])
+        a = masked_array([[8,4,1],[2,0,9]])
+        a.sort(1)
+        assert_equal(a, [[1,4,8],[0,2,9]])
+        # 2D array w/mask
+        a = masked_array([[8,4,1],[2,0,9]], mask=[[1,0,0],[0,0,1]])
+        a.sort(0)
+        assert_equal(a, [[2,0,1],[8,4,9]])
+        assert_equal(a._mask, [[0,0,0],[1,0,1]])
+        a = masked_array([[8,4,1],[2,0,9]], mask=[[1,0,0],[0,0,1]])
+        a.sort(1)
+        assert_equal(a, [[1,4,8],[0,2,9]])
+        assert_equal(a._mask, [[0,0,1],[0,0,1]])
+        # 3D
+        a = masked_array([[[7, 8, 9],[4, 5, 6],[1, 2, 3]],
+                          [[1, 2, 3],[7, 8, 9],[4, 5, 6]],
+                          [[7, 8, 9],[1, 2, 3],[4, 5, 6]],
+                          [[4, 5, 6],[1, 2, 3],[7, 8, 9]]])
+        a[a%4==0] = masked
+        am = a.copy()
+        an = a.filled(99)
+        am.sort(0)
+        an.sort(0)
+        assert_equal(am, an)
+        am = a.copy()
+        an = a.filled(99)
+        am.sort(1)
+        an.sort(1)
+        assert_equal(am, an)
+        am = a.copy()
+        an = a.filled(99)
+        am.sort(2)
+        an.sort(2)
+        assert_equal(am, an)
+        
+    
     def check_ravel(self):
         "Tests ravel"
         a = array([[1,2,3,4,5]], mask=[[0,1,0,0,0]])

Added: trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_mstats.py	2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/tests/test_mstats.py	2007-03-27 19:12:29 UTC (rev 2877)
@@ -0,0 +1,117 @@
+# pylint: disable-msg=W0611, W0612, W0511,R0201
+"""Tests suite for maskedArray statistics.
+
+:author: Pierre Gerard-Marchant
+:contact: pierregm_at_uga_dot_edu
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__     = '$Date$'
+
+import numpy
+
+import maskedarray
+from maskedarray import masked, masked_array
+
+import maskedarray.testutils
+from maskedarray.testutils import *
+
+from maskedarray.mstats import mquantiles, mmedian
+
+#..............................................................................
+class test_quantiles(NumpyTestCase):
+    "Base test class for MaskedArrays."
+    def __init__(self, *args, **kwds):
+        NumpyTestCase.__init__(self, *args, **kwds)
+        self.a = maskedarray.arange(1,101)
+    #
+    def test_1d_nomask(self):
+        "Test quantiles 1D - w/o mask."
+        a = self.a
+        assert_almost_equal(mquantiles(a, alphap=1., betap=1.), 
+                            [25.75, 50.5, 75.25])
+        assert_almost_equal(mquantiles(a, alphap=0, betap=1.), 
+                            [25., 50., 75.])
+        assert_almost_equal(mquantiles(a, alphap=0.5, betap=0.5), 
+                            [25.5, 50.5, 75.5])
+        assert_almost_equal(mquantiles(a, alphap=0., betap=0.), 
+                            [25.25, 50.5, 75.75])
+        assert_almost_equal(mquantiles(a, alphap=1./3, betap=1./3), 
+                            [25.41666667, 50.5, 75.5833333])
+        assert_almost_equal(mquantiles(a, alphap=3./8, betap=3./8), 
+                            [25.4375, 50.5, 75.5625])
+        assert_almost_equal(mquantiles(a), [25.45, 50.5, 75.55])# 
+    #
+    def test_1d_mask(self):
+        "Test quantiles 1D - w/ mask."
+        a = self.a
+        a[1::2] = masked
+        assert_almost_equal(mquantiles(a, alphap=1., betap=1.), 
+                            [25.5, 50.0, 74.5])
+        assert_almost_equal(mquantiles(a, alphap=0, betap=1.), 
+                            [24., 49., 74.])
+        assert_almost_equal(mquantiles(a, alphap=0.5, betap=0.5), 
+                            [25., 50., 75.])
+        assert_almost_equal(mquantiles(a, alphap=0., betap=0.), 
+                            [24.5, 50.0, 75.5])
+        assert_almost_equal(mquantiles(a, alphap=1./3, betap=1./3), 
+                            [24.833333, 50.0, 75.166666])
+        assert_almost_equal(mquantiles(a, alphap=3./8, betap=3./8), 
+                            [24.875, 50., 75.125])
+        assert_almost_equal(mquantiles(a), [24.9, 50., 75.1])
+    #
+    def test_2d_nomask(self):
+        "Test quantiles 2D - w/o mask."
+        a = self.a
+        b = maskedarray.resize(a, (100,100))
+        assert_almost_equal(mquantiles(b), [25.45, 50.5, 75.55])
+        assert_almost_equal(mquantiles(b, axis=0), maskedarray.resize(a,(3,100)))
+        assert_almost_equal(mquantiles(b, axis=1), 
+                            maskedarray.resize([25.45, 50.5, 75.55], (100,3)))
+    #
+    def test_2d_mask(self):
+        "Test quantiles 2D - w/ mask."
+        a = self.a
+        a[1::2] = masked
+        b = maskedarray.resize(a, (100,100))
+        assert_almost_equal(mquantiles(b), [25., 50., 75.])
+        assert_almost_equal(mquantiles(b, axis=0), maskedarray.resize(a,(3,100)))
+        assert_almost_equal(mquantiles(b, axis=1), 
+                            maskedarray.resize([24.9, 50., 75.1], (100,3)))        
+        
+class test_median(NumpyTestCase):
+    def __init__(self, *args, **kwds):
+        NumpyTestCase.__init__(self, *args, **kwds)
+        
+    def test_2d(self):
+        "Tests median w/ 2D"
+        (n,p) = (101,30)
+        x = masked_array(numpy.linspace(-1.,1.,n),)
+        x[:10] = x[-10:] = masked
+        z = masked_array(numpy.empty((n,p), dtype=numpy.float_))
+        z[:,0] = x[:]
+        idx = numpy.arange(len(x))
+        for i in range(1,p):
+            numpy.random.shuffle(idx)
+            z[:,i] = x[idx]
+        assert_equal(mmedian(z[:,0]), 0)
+        assert_equal(mmedian(z), numpy.zeros((p,)))
+        
+    def test_3d(self):
+        "Tests median w/ 3D"
+        x = maskedarray.arange(24).reshape(3,4,2)
+        x[x%3==0] = masked
+        assert_equal(mmedian(x), [[12,9],[6,15],[12,9],[18,15]])
+        x.shape = (4,3,2)
+        assert_equal(mmedian(x),[[99,10],[11,99],[13,14]])
+        x = maskedarray.arange(24).reshape(4,3,2)
+        x[x%5==0] = masked
+        assert_equal(mmedian(x), [[12,10],[8,9],[16,17]])
+        
+###############################################################################
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+    NumpyTest().run()
+            
\ No newline at end of file


Property changes on: trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
___________________________________________________________________
Name: svn:keywords
   + Date 
Author 
Revision
Id




More information about the Scipy-svn mailing list