[Scipy-svn] r2877 - in trunk/Lib/sandbox/maskedarray: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Mar 27 15:12:33 EDT 2007
Author: pierregm
Date: 2007-03-27 14:12:29 -0500 (Tue, 27 Mar 2007)
New Revision: 2877
Added:
trunk/Lib/sandbox/maskedarray/mstats.py
trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
Modified:
trunk/Lib/sandbox/maskedarray/core.py
trunk/Lib/sandbox/maskedarray/tests/test_core.py
Log:
core : fixed .sort for ndarrays
mstats : misc. statistics supporting masked arrays (quantiles + medians)
Modified: trunk/Lib/sandbox/maskedarray/core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/core.py 2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/core.py 2007-03-27 19:12:29 UTC (rev 2877)
@@ -1775,20 +1775,24 @@
|'heapsort' | 3 | O(n*log(n)) | 0 | no |
|------------------------------------------------------|
- All the sort algorithms make temporary copies of the data when the sort is
- not along the last axis. Consequently, sorts along the last axis are faster
- and use less space than sorts along other axis.
-
"""
- if fill_value is None:
- if endwith:
- filler = minimum_fill_value(self)
+ if self._mask is nomask:
+ ndarray.sort(self,axis=axis, kind=kind, order=order)
+ else:
+ if fill_value is None:
+ if endwith:
+ filler = minimum_fill_value(self)
+ else:
+ filler = maximum_fill_value(self)
else:
- filler = maximum_fill_value(self)
- else:
- filler = fill_value
- indx = self.filled(filler).argsort(axis=axis,kind=kind,order=order)
- self[:] = self[indx]
+ filler = fill_value
+ idx = numpy.indices(self.shape)
+ idx[axis] = self.filled(filler).argsort(axis=axis,kind=kind,order=order)
+ idx_l = idx.tolist()
+ tmp_mask = self._mask[idx_l].flat
+ tmp_data = self._data[idx_l].flat
+ self.flat = tmp_data
+ self._mask.flat = tmp_mask
return
#............................................
def min(self, axis=None, fill_value=None):
@@ -2620,18 +2624,35 @@
if __name__ == '__main__':
import numpy as N
from maskedarray.testutils import assert_equal, assert_array_equal
+ marray = masked_array
#
- a = arange(10)
- a[::3] = masked
- a.fill_value = 999
- a_pickled = cPickle.loads(a.dumps())
- assert_equal(a_pickled._mask, a._mask)
- assert_equal(a_pickled._data, a._data)
- assert_equal(a_pickled.fill_value, 999)
+ if 0:
+ a = arange(10)
+ a[::3] = masked
+ a.fill_value = 999
+ a_pickled = cPickle.loads(a.dumps())
+ assert_equal(a_pickled._mask, a._mask)
+ assert_equal(a_pickled._data, a._data)
+ assert_equal(a_pickled.fill_value, 999)
+ #
+ a = array(numpy.matrix(range(10)), mask=[1,0,1,0,0]*2)
+ a_pickled = cPickle.loads(a.dumps())
+ assert_equal(a_pickled._mask, a._mask)
+ assert_equal(a_pickled, a)
+ assert(isinstance(a_pickled._data,numpy.matrix))
#
- a = array(numpy.matrix(range(10)), mask=[1,0,1,0,0]*2)
- a_pickled = cPickle.loads(a.dumps())
- assert_equal(a_pickled._mask, a._mask)
- assert_equal(a_pickled, a)
- assert(isinstance(a_pickled._data,numpy.matrix))
+ #
+ if 1:
+ x = marray(numpy.linspace(-1.,1.,31),)
+ x[:10] = x[-10:] = masked
+ z = marray(numpy.empty((len(x),3), dtype=numpy.float_))
+ z[:,0] = x[:]
+ for i in range(1,3):
+ idx = numpy.arange(len(x))
+ numpy.random.shuffle(idx)
+ z[:,i] = x[idx]
+ #
+ z.sort(0)
+
+
Added: trunk/Lib/sandbox/maskedarray/mstats.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/mstats.py 2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/mstats.py 2007-03-27 19:12:29 UTC (rev 2877)
@@ -0,0 +1,209 @@
+"""
+Generic statistics functions, with support to MA.
+
+:author: Pierre GF Gerard-Marchant
+:contact: pierregm_at_uga_edu
+:date: $Date$
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+
+import numpy
+from numpy import bool_, float_, int_
+from numpy import array as narray
+from numpy.core import numeric as numeric
+
+import maskedarray as MA
+from maskedarray.core import masked, nomask, MaskedArray
+from maskedarray.core import masked_array as marray
+from maskedarray.extras import apply_along_axis
+
+
+def _quantiles_1D(data,m,p):
+ """Returns quantiles for 1D compressed data.
+ Used internally by `mquantiles`.
+
+:Parameters:
+ data : ndarray
+ Array to quantize
+ m : Sequence
+ p : float ndarray
+ Quantiles to compute
+ """
+ n = data.count()
+ if n == 0:
+ return MA.resize(masked, len(p))
+ elif n == 1:
+ return MA.resize(data,len(p))
+ data = data.compressed()
+ aleph = (n*p + m)
+ k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
+ gamma = (aleph-k).clip(0,1)
+ y = MA.sort(data)
+ return (1.-gamma)*y[(k-1).tolist()] + gamma*y[k.tolist()]
+
+def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None):
+ """Computes empirical quantiles for a *1xN* data array.
+Samples quantile are defined by:
+*Q(p) = (1-g).x[i] +g.x[i+1]*
+where *x[j]* is the jth order statistic,
+with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.
+
+Typical values of (alpha,beta) are:
+
+ - (0,1) : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
+ - (.5,.5) : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
+ - (0,0) : *p(k) = k/(n+1)* : (R type 6)
+ - (1,1) : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
+ That's R default (R type 7)
+ - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
+ The resulting quantile estimates are approximately median-unbiased
+ regardless of the distribution of x. (R type 8)
+ - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
+ The resulting quantile estimates are approximately unbiased
+ if x is normally distributed (R type 9)
+ - (.4,.4) : approximately quantile unbiased (Cunnane)
+ - (.35,.35): APL, used with PWM
+
+:Parameters:
+ x : Sequence
+ Input data, as a sequence or array of dimension at most 2.
+ prob : Sequence *[(0.25, 0.5, 0.75)]*
+ List of quantiles to compute.
+ alpha : Float (*[0.4]*)
+ Plotting positions parameter.
+ beta : Float (*[0.4]*)
+ Plotting positions parameter.
+ axis : Integer *[None]*
+ Axis along which to compute quantiles. If *None*, uses the whole
+ (flattened/compressed) dataset.
+ """
+
+ # Initialization & checks ---------
+ data = marray(data, copy=False)
+ assert data.ndim <= 2, "Array should be 2D at most !"
+ p = narray(prob, copy=False, ndmin=1)
+ m = alphap + p*(1.-alphap-betap)
+ # Computes quantiles along axis (or globally)
+ if (axis is None):
+ return _quantiles_1D(data, m, p)
+ else:
+ return apply_along_axis(_quantiles_1D, axis, data, m, p)
+
+
+def _median1d(data):
+ """Returns the median of a 1D masked array. Used internally by mmedian."""
+ datac = data.compressed()
+ if datac.size > 0:
+ return numpy.median(data.compressed())
+ return masked
+
+def _median2d_1(data):
+ data = marray(data, subok=True, copy=True)
+ if data._mask is nomask:
+ return numpy.median(data)
+ if data.ndim != 2 :
+ raise ValueError("Input array should be 2D!")
+ (n,p) = data.shape
+ if p < n//3:
+ return apply_along_axis(_median1d, 0, data)
+ data.sort(axis=0)
+ counts = data.count(axis=0)
+ midx = (counts//2)
+ midx_even = (counts%2==0)
+ med = marray(numeric.empty((data.shape[-1],), dtype=data.dtype))
+ med[midx_even] = (data[midx-1]+data[midx])/2.
+ med[numpy.logical_not(midx_even)] = data[midx]
+ if not med._mask.any():
+ med._mask = nomask
+ return med
+
+def _median2d_2(data):
+ return apply_along_axis(_median1d, 0, data)
+
+
+def mmedian(data):
+ """Returns the median of data along the first axis. Missing data are discarded."""
+ data = marray(data, subok=True, copy=True)
+ if data._mask is nomask:
+ return numpy.median(data)
+ if data.ndim == 1:
+ return _median1d(data)
+# elif data.ndim == 2:
+# (n, p) = data.shape
+# if p < n//3:
+# return apply_along_axis(_median1d, 0, data)
+# data.sort(axis=0)
+# counts = data.count(axis=0)
+# midx = (counts//2)
+# midx_even = (counts%2==0)
+# med = marray(numeric.empty((p,), dtype=data.dtype))
+# med[midx_even] = (data[midx-1]+data[midx])/2.
+# med[numpy.logical_not(midx_even)] = data[midx]
+# if not med._mask.any():
+# med._mask = nomask
+# return med
+ return apply_along_axis(_median1d, 0, data)
+
+
+
+################################################################################
+if __name__ == '__main__':
+ from maskedarray.testutils import assert_almost_equal, assert_equal
+ import timeit
+ import maskedarray
+
+ if 1:
+ (n,p) = (101,30)
+ x = marray(numpy.linspace(-1.,1.,n),)
+ x[:10] = x[-10:] = masked
+ z = marray(numpy.empty((n,p), dtype=numpy.float_))
+ z[:,0] = x[:]
+ idx = numpy.arange(len(x))
+ for i in range(1,p):
+ numpy.random.shuffle(idx)
+ z[:,i] = x[idx]
+
+ assert_equal(mmedian(z[:,0]), 0)
+ assert_equal(mmedian(z), numpy.zeros((p,)))
+
+ x = maskedarray.arange(24).reshape(3,4,2)
+ x[x%3==0] = masked
+ assert_equal(mmedian(x), [[12,9],[6,15],[12,9],[18,15]])
+ x.shape = (4,3,2)
+ assert_equal(mmedian(x),[[99,10],[11,99],[13,14]])
+ x = maskedarray.arange(24).reshape(4,3,2)
+ x[x%5==0] = masked
+ assert_equal(mmedian(x), [[12,10],[8,9],[16,17]])
+
+
+ """ [[[0 1], [2 3], [4 5]]
+ [[6 7], [8 9], [10 11]]
+ [[9 13] [14 15] [16 17]]
+ [[18 19] [20 21] [22 23]]],
+
+ [[[-- 1] [2 --] [4 5] [-- 7]]
+ [[8 --] [10 11] [-- 13] [14 --]]
+ [[16 17] [-- 19] [20 --] [22 23]]],
+
+ """
+
+ if 0:
+ print "GO!"
+ med_setup1 = "from __main__ import _median2d_1,z"
+ med_setup3 = "from __main__ import mmedian,z"
+ med_setup2 = "from __main__ import _median2d_2,z"
+ (nrep, nloop) = (3,10)
+ med_r1 = timeit.Timer("_median2d_1(z)", med_setup1).repeat(nrep,nloop)
+ med_r2 = timeit.Timer("_median2d_2(z)", med_setup2).repeat(nrep,nloop)
+ med_r3 = timeit.Timer("mmedian(z)", med_setup3).repeat(nrep,nloop)
+ med_r1 = numpy.sort(med_r1)
+ med_r2 = numpy.sort(med_r2)
+ med_r3 = numpy.sort(med_r3)
+ print "median2d_1 : %s" % med_r1
+ print "median2d_2 : %s" % med_r2
+ print "median : %s" % med_r3
\ No newline at end of file
Property changes on: trunk/Lib/sandbox/maskedarray/mstats.py
___________________________________________________________________
Name: svn:keywords
+ Date
Author
Revision
Id
Modified: trunk/Lib/sandbox/maskedarray/tests/test_core.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_core.py 2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/tests/test_core.py 2007-03-27 19:12:29 UTC (rev 2877)
@@ -1105,6 +1105,47 @@
assert_equal(sortedx._data, [1,2,-2,-1,0])
assert_equal(sortedx._mask, [1,1,0,0,0])
+ def check_sort_2d(self):
+ "Check sort of 2D array."
+ # 2D array w/o mask
+ a = masked_array([[8,4,1],[2,0,9]])
+ a.sort(0)
+ assert_equal(a, [[2,0,1],[8,4,9]])
+ a = masked_array([[8,4,1],[2,0,9]])
+ a.sort(1)
+ assert_equal(a, [[1,4,8],[0,2,9]])
+ # 2D array w/mask
+ a = masked_array([[8,4,1],[2,0,9]], mask=[[1,0,0],[0,0,1]])
+ a.sort(0)
+ assert_equal(a, [[2,0,1],[8,4,9]])
+ assert_equal(a._mask, [[0,0,0],[1,0,1]])
+ a = masked_array([[8,4,1],[2,0,9]], mask=[[1,0,0],[0,0,1]])
+ a.sort(1)
+ assert_equal(a, [[1,4,8],[0,2,9]])
+ assert_equal(a._mask, [[0,0,1],[0,0,1]])
+ # 3D
+ a = masked_array([[[7, 8, 9],[4, 5, 6],[1, 2, 3]],
+ [[1, 2, 3],[7, 8, 9],[4, 5, 6]],
+ [[7, 8, 9],[1, 2, 3],[4, 5, 6]],
+ [[4, 5, 6],[1, 2, 3],[7, 8, 9]]])
+ a[a%4==0] = masked
+ am = a.copy()
+ an = a.filled(99)
+ am.sort(0)
+ an.sort(0)
+ assert_equal(am, an)
+ am = a.copy()
+ an = a.filled(99)
+ am.sort(1)
+ an.sort(1)
+ assert_equal(am, an)
+ am = a.copy()
+ an = a.filled(99)
+ am.sort(2)
+ an.sort(2)
+ assert_equal(am, an)
+
+
def check_ravel(self):
"Tests ravel"
a = array([[1,2,3,4,5]], mask=[[0,1,0,0,0]])
Added: trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
===================================================================
--- trunk/Lib/sandbox/maskedarray/tests/test_mstats.py 2007-03-27 18:10:50 UTC (rev 2876)
+++ trunk/Lib/sandbox/maskedarray/tests/test_mstats.py 2007-03-27 19:12:29 UTC (rev 2877)
@@ -0,0 +1,117 @@
+# pylint: disable-msg=W0611, W0612, W0511,R0201
+"""Tests suite for maskedArray statistics.
+
+:author: Pierre Gerard-Marchant
+:contact: pierregm_at_uga_dot_edu
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+import numpy
+
+import maskedarray
+from maskedarray import masked, masked_array
+
+import maskedarray.testutils
+from maskedarray.testutils import *
+
+from maskedarray.mstats import mquantiles, mmedian
+
+#..............................................................................
+class test_quantiles(NumpyTestCase):
+ "Base test class for MaskedArrays."
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+ self.a = maskedarray.arange(1,101)
+ #
+ def test_1d_nomask(self):
+ "Test quantiles 1D - w/o mask."
+ a = self.a
+ assert_almost_equal(mquantiles(a, alphap=1., betap=1.),
+ [25.75, 50.5, 75.25])
+ assert_almost_equal(mquantiles(a, alphap=0, betap=1.),
+ [25., 50., 75.])
+ assert_almost_equal(mquantiles(a, alphap=0.5, betap=0.5),
+ [25.5, 50.5, 75.5])
+ assert_almost_equal(mquantiles(a, alphap=0., betap=0.),
+ [25.25, 50.5, 75.75])
+ assert_almost_equal(mquantiles(a, alphap=1./3, betap=1./3),
+ [25.41666667, 50.5, 75.5833333])
+ assert_almost_equal(mquantiles(a, alphap=3./8, betap=3./8),
+ [25.4375, 50.5, 75.5625])
+ assert_almost_equal(mquantiles(a), [25.45, 50.5, 75.55])#
+ #
+ def test_1d_mask(self):
+ "Test quantiles 1D - w/ mask."
+ a = self.a
+ a[1::2] = masked
+ assert_almost_equal(mquantiles(a, alphap=1., betap=1.),
+ [25.5, 50.0, 74.5])
+ assert_almost_equal(mquantiles(a, alphap=0, betap=1.),
+ [24., 49., 74.])
+ assert_almost_equal(mquantiles(a, alphap=0.5, betap=0.5),
+ [25., 50., 75.])
+ assert_almost_equal(mquantiles(a, alphap=0., betap=0.),
+ [24.5, 50.0, 75.5])
+ assert_almost_equal(mquantiles(a, alphap=1./3, betap=1./3),
+ [24.833333, 50.0, 75.166666])
+ assert_almost_equal(mquantiles(a, alphap=3./8, betap=3./8),
+ [24.875, 50., 75.125])
+ assert_almost_equal(mquantiles(a), [24.9, 50., 75.1])
+ #
+ def test_2d_nomask(self):
+ "Test quantiles 2D - w/o mask."
+ a = self.a
+ b = maskedarray.resize(a, (100,100))
+ assert_almost_equal(mquantiles(b), [25.45, 50.5, 75.55])
+ assert_almost_equal(mquantiles(b, axis=0), maskedarray.resize(a,(3,100)))
+ assert_almost_equal(mquantiles(b, axis=1),
+ maskedarray.resize([25.45, 50.5, 75.55], (100,3)))
+ #
+ def test_2d_mask(self):
+ "Test quantiles 2D - w/ mask."
+ a = self.a
+ a[1::2] = masked
+ b = maskedarray.resize(a, (100,100))
+ assert_almost_equal(mquantiles(b), [25., 50., 75.])
+ assert_almost_equal(mquantiles(b, axis=0), maskedarray.resize(a,(3,100)))
+ assert_almost_equal(mquantiles(b, axis=1),
+ maskedarray.resize([24.9, 50., 75.1], (100,3)))
+
+class test_median(NumpyTestCase):
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+
+ def test_2d(self):
+ "Tests median w/ 2D"
+ (n,p) = (101,30)
+ x = masked_array(numpy.linspace(-1.,1.,n),)
+ x[:10] = x[-10:] = masked
+ z = masked_array(numpy.empty((n,p), dtype=numpy.float_))
+ z[:,0] = x[:]
+ idx = numpy.arange(len(x))
+ for i in range(1,p):
+ numpy.random.shuffle(idx)
+ z[:,i] = x[idx]
+ assert_equal(mmedian(z[:,0]), 0)
+ assert_equal(mmedian(z), numpy.zeros((p,)))
+
+ def test_3d(self):
+ "Tests median w/ 3D"
+ x = maskedarray.arange(24).reshape(3,4,2)
+ x[x%3==0] = masked
+ assert_equal(mmedian(x), [[12,9],[6,15],[12,9],[18,15]])
+ x.shape = (4,3,2)
+ assert_equal(mmedian(x),[[99,10],[11,99],[13,14]])
+ x = maskedarray.arange(24).reshape(4,3,2)
+ x[x%5==0] = masked
+ assert_equal(mmedian(x), [[12,10],[8,9],[16,17]])
+
+###############################################################################
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+ NumpyTest().run()
+
\ No newline at end of file
Property changes on: trunk/Lib/sandbox/maskedarray/tests/test_mstats.py
___________________________________________________________________
Name: svn:keywords
+ Date
Author
Revision
Id
More information about the Scipy-svn
mailing list