[Numpy-svn] r5613 - in trunk/numpy/ma: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Tue Aug 5 19:21:33 EDT 2008
Author: pierregm
Date: 2008-08-05 18:21:31 -0500 (Tue, 05 Aug 2008)
New Revision: 5613
Modified:
trunk/numpy/ma/extras.py
trunk/numpy/ma/tests/test_extras.py
Log:
* added cov and corrcoef to ma.extras for compatibility
Modified: trunk/numpy/ma/extras.py
===================================================================
--- trunk/numpy/ma/extras.py 2008-08-05 18:01:14 UTC (rev 5612)
+++ trunk/numpy/ma/extras.py 2008-08-05 23:21:31 UTC (rev 5613)
@@ -14,7 +14,7 @@
__all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d',
'average',
'column_stack','compress_cols','compress_rowcols', 'compress_rows',
- 'count_masked',
+ 'count_masked', 'corrcoef', 'cov',
'dot','dstack',
'ediff1d','expand_dims',
'flatnotmasked_contiguous','flatnotmasked_edges',
@@ -30,7 +30,7 @@
from itertools import groupby
import warnings
-import core
+import core as ma
from core import MaskedArray, MAError, add, array, asarray, concatenate, count,\
filled, getmask, getmaskarray, masked, masked_array, mask_or, nomask, ones,\
sort, zeros
@@ -166,7 +166,7 @@
arr is an N-d array. i varies so as to apply the function along
the given axis for each 1-d subarray in arr.
"""
- arr = core.array(arr, copy=False, subok=True)
+ arr = array(arr, copy=False, subok=True)
nd = arr.ndim
if axis < 0:
axis += nd
@@ -213,7 +213,7 @@
dtypes.append(asarray(res).dtype)
k += 1
else:
- res = core.array(res, copy=False, subok=True)
+ res = array(res, copy=False, subok=True)
j = i.copy()
j[axis] = ([slice(None, None)] * res.ndim)
j.put(indlist, ind)
@@ -244,8 +244,8 @@
if not hasattr(arr, '_mask'):
result = np.asarray(outarr, dtype=max_dtypes)
else:
- result = core.asarray(outarr, dtype=max_dtypes)
- result.fill_value = core.default_fill_value(result)
+ result = asarray(outarr, dtype=max_dtypes)
+ result.fill_value = ma.default_fill_value(result)
return result
def average(a, axis=None, weights=None, returned=False):
@@ -548,21 +548,19 @@
def dot(a,b, strict=False):
"""Return the dot product of two 2D masked arrays a and b.
- Like the generic numpy equivalent, the product sum is over the
- last dimension of a and the second-to-last dimension of b. If
- strict is True, masked values are propagated: if a masked value
- appears in a row or column, the whole row or column is considered
- masked.
+ Like the generic numpy equivalent, the product sum is over the last
+ dimension of a and the second-to-last dimension of b. If strict is True,
+ masked values are propagated: if a masked value appears in a row or column,
+ the whole row or column is considered masked.
Parameters
----------
- strict : {boolean}
- Whether masked data are propagated (True) or set to 0 for
- the computation.
+ strict : {boolean}
+ Whether masked data are propagated (True) or set to 0 for the computation.
Notes
-----
- The first argument is not conjugated.
+ The first argument is not conjugated.
"""
#!!!: Works only with 2D arrays. There should be a way to get it to run with higher dimension
@@ -646,7 +644,132 @@
+def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
+ """Estimates the covariance matrix.
+ Normalization is by (N-1) where N is the number of observations (unbiased
+ estimate). If bias is True then normalization is by N.
+
+ By default, masked values are recognized as such. If x and y have the same
+ shape, a common mask is allocated: if x[i,j] is masked, then y[i,j] will also
+ be masked.
+ Setting `allow_masked` to False will raise an exception if values are missing
+ in either of the input arrays.
+
+ Parameters
+ ----------
+ x : array-like
+ Input data.
+ If x is a 1D array, returns the variance.
+ If x is a 2D array, returns the covariance matrix.
+ y : {None, array-like}, optional
+ Optional set of variables.
+ rowvar : {False, True} optional
+ If rowvar is true, then each row is a variable with observations in columns.
+ If rowvar is False, each column is a variable and the observations are in
+ the rows.
+ bias : {False, True} optional
+ Whether to use a biased (True) or unbiased (False) estimate of the covariance.
+ If bias is True, then the normalization is by N, the number of observations.
+ Otherwise, the normalization is by (N-1).
+ allow_masked : {True, False} optional
+ If True, masked values are propagated pair-wise: if a value is masked in x,
+ the corresponding value is masked in y.
+ If False, raises a ValueError exception when some values are missing.
+
+ Raises
+ ------
+ ValueError:
+ Raised if some values are missing and allow_masked is False.
+
+ """
+ x = array(x, ndmin=2, copy=True, dtype=float)
+ xmask = getmaskarray(x)
+ # Quick exit if we can't process masked data
+ if not allow_masked and xmask.any():
+ raise ValueError("Cannot process masked data...")
+ #
+ if x.shape[0] == 1:
+ rowvar = True
+ # Make sure that rowvar is either 0 or 1
+ rowvar = int(bool(rowvar))
+ axis = 1-rowvar
+ if rowvar:
+ tup = (slice(None), None)
+ else:
+ tup = (None, slice(None))
+ #
+ if y is None:
+ xnotmask = np.logical_not(xmask).astype(int)
+ else:
+ y = array(y, copy=False, ndmin=2, dtype=float)
+ ymask = getmaskarray(y)
+ if not allow_masked and ymask.any():
+ raise ValueError("Cannot process masked data...")
+ if xmask.any() or ymask.any():
+ if y.shape == x.shape:
+ # Define some common mask
+ common_mask = np.logical_or(xmask, ymask)
+ if common_mask is not nomask:
+ x.unshare_mask()
+ y.unshare_mask()
+ xmask = x._mask = y._mask = ymask = common_mask
+ x = concatenate((x,y),axis)
+ xnotmask = np.logical_not(np.concatenate((xmask, ymask), axis)).astype(int)
+
+ x -= x.mean(axis=rowvar)[tup]
+
+ if not rowvar:
+ fact = dot(xnotmask.T, xnotmask)*1. - (1 - bool(bias))
+ result = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
+ else:
+ fact = dot(xnotmask, xnotmask.T)*1. - (1 - bool(bias))
+ result = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
+ return result
+
+
+def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
+ """The correlation coefficients formed from the array x, where the
+ rows are the observations, and the columns are variables.
+
+ corrcoef(x,y) where x and y are 1d arrays is the same as
+ corrcoef(transpose([x,y]))
+
+ Parameters
+ ----------
+ x : ndarray
+ Input data. If x is a 1D array, returns the variance.
+ If x is a 2D array, returns the covariance matrix.
+ y : {None, ndarray} optional
+ Optional set of variables.
+ rowvar : {False, True} optional
+ If True, then each row is a variable with observations in columns.
+ If False, each column is a variable and the observations are in the rows.
+ bias : {False, True} optional
+ Whether to use a biased (True) or unbiased (False) estimate of the
+ covariance.
+ If True, then the normalization is by N, the number of non-missing
+ observations.
+ Otherwise, the normalization is by (N-1).
+ allow_masked : {True, False} optional
+ If True, masked values are propagated pair-wise: if a value is masked
+ in x, the corresponding value is masked in y.
+ If False, raises an exception.
+
+ See Also
+ --------
+ cov
+
+ """
+
+ c = cov(x, y, rowvar, bias, allow_masked)
+ try:
+ d = ma.diag(c)
+ except ValueError: # scalar covariance
+ return 1
+ return c/ma.sqrt(ma.multiply.outer(d,d))
+
+
#####--------------------------------------------------------------------------
#---- --- Concatenation helpers ---
#####--------------------------------------------------------------------------
Modified: trunk/numpy/ma/tests/test_extras.py
===================================================================
--- trunk/numpy/ma/tests/test_extras.py 2008-08-05 18:01:14 UTC (rev 5612)
+++ trunk/numpy/ma/tests/test_extras.py 2008-08-05 23:21:31 UTC (rev 5613)
@@ -12,7 +12,7 @@
__date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
import numpy
-from numpy.testing import *
+from numpy.testing import TestCase, run_module_suite
from numpy.ma.testutils import *
from numpy.ma.core import *
from numpy.ma.extras import *
@@ -317,6 +317,7 @@
assert_equal(dx._mask, np.r_[1,0,0,0,0,1])
class TestApplyAlongAxis(TestCase):
+ #
"Tests 2D functions"
def test_3d(self):
a = arange(12.).reshape(2,2,3)
@@ -325,7 +326,9 @@
xa = apply_along_axis(myfunc,2,a)
assert_equal(xa,[[1,4],[7,10]])
+
class TestMedian(TestCase):
+ #
def test_2d(self):
"Tests median w/ 2D"
(n,p) = (101,30)
@@ -352,6 +355,71 @@
assert_equal(median(x,0), [[12,10],[8,9],[16,17]])
+class TestCov(TestCase):
+ #
+ def setUp(self):
+ self.data = array(np.random.rand(12))
+ #
+ def test_1d_wo_missing(self):
+ "Test cov on 1D variable w/o missing values"
+ x = self.data
+ assert_equal(np.cov(x), cov(x))
+ assert_equal(np.cov(x, rowvar=False), cov(x, rowvar=False))
+ assert_equal(np.cov(x, rowvar=False, bias=True),
+ cov(x, rowvar=False, bias=True))
+ #
+ def test_2d_wo_missing(self):
+ "Test cov on 1 2D variable w/o missing values"
+ x = self.data.reshape(3,4)
+ assert_equal(np.cov(x), cov(x))
+ assert_equal(np.cov(x, rowvar=False), cov(x, rowvar=False))
+ assert_equal(np.cov(x, rowvar=False, bias=True),
+ cov(x, rowvar=False, bias=True))
+ #
+ def test_1d_w_missing(self):
+ "Test cov 1 1D variable w/missing values"
+ x = self.data
+ x[-1] = masked
+ x -= x.mean()
+ nx = x.compressed()
+ assert_almost_equal(np.cov(nx), cov(x))
+ assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False))
+ assert_almost_equal(np.cov(nx, rowvar=False, bias=True),
+ cov(x, rowvar=False, bias=True))
+ #
+ try:
+ cov(x, allow_masked=False)
+ except ValueError:
+ pass
+ #
+ # 2 1D variables w/ missing values
+ nx = x[1:-1]
+ assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1]))
+ assert_equal(np.cov(nx, nx[::-1], rowvar=False),
+ cov(x, x[::-1], rowvar=False))
+ assert_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True),
+ cov(x, x[::-1], rowvar=False, bias=True))
+ #
+ def test_2d_w_missing(self):
+ "Test cov on 2D variable w/ missing value"
+ x = self.data
+ x[-1] = masked
+ x = x.reshape(3,4)
+ valid = np.logical_not(getmaskarray(x)).astype(int)
+ frac = np.dot(valid, valid.T)
+ xf = (x - x.mean(1)[:,None]).filled(0)
+ assert_almost_equal(cov(x), np.cov(xf) * (x.shape[1]-1) / (frac - 1.))
+ assert_almost_equal(cov(x, bias=True),
+ np.cov(xf, bias=True) * x.shape[1] / frac)
+ frac = np.dot(valid.T, valid)
+ xf = (x - x.mean(0)).filled(0)
+ assert_almost_equal(cov(x, rowvar=False),
+ np.cov(xf, rowvar=False) * (x.shape[0]-1)/(frac - 1.))
+ assert_almost_equal(cov(x, rowvar=False, bias=True),
+ np.cov(xf, rowvar=False, bias=True) * x.shape[0]/frac)
+
+
+
class TestPolynomial(TestCase):
#
def test_polyfit(self):
More information about the Numpy-svn
mailing list