[Scipy-svn] r4758 - in trunk/scipy/cluster: . src tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Sep 30 15:15:04 EDT 2008
Author: damian.eads
Date: 2008-09-30 14:14:59 -0500 (Tue, 30 Sep 2008)
New Revision: 4758
Modified:
trunk/scipy/cluster/distance.py
trunk/scipy/cluster/hierarchy.py
trunk/scipy/cluster/src/distance.c
trunk/scipy/cluster/src/distance.h
trunk/scipy/cluster/src/distance_wrap.c
trunk/scipy/cluster/tests/test_distance.py
Log:
Fixed minor bug in cophenet.
Modified: trunk/scipy/cluster/distance.py
===================================================================
--- trunk/scipy/cluster/distance.py 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/distance.py 2008-09-30 19:14:59 UTC (rev 4758)
@@ -199,6 +199,35 @@
raise ValueError("p must be at least 1")
return (abs(u-v)**p).sum() ** (1.0 / p)
+def wminkowski(u, v, p, w):
+ """
+ Computes the weighted Minkowski distance between two vectors ``u``
+ and ``v``, defined as
+
+ .. math::
+
+ \sum {(w_i*|u_i - v_i|)^p})^(1/p).
+
+ :Parameters:
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+ p : ndarray
+ The norm of the difference :math:`${||u-v||}_p$`.
+ w : ndarray
+ The weight vector.
+
+ :Returns:
+ d : double
+ The Minkowski distance between vectors ``u`` and ``v``.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ return ((w * abs(u-v))**p).sum() ** (1.0 / p)
+
def euclidean(u, v):
"""
Computes the Euclidean distance between two n-vectors ``u`` and ``v``,
@@ -817,6 +846,14 @@
'jaccard', 'kulsinski', 'mahalanobis', 'matching',
'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
:Returns:
Y : ndarray
@@ -978,6 +1015,11 @@
Computes the Sokal-Sneath distance between each pair of
boolean vectors. (see sokalsneath function documentation)
+ 22. ``Y = pdist(X, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between each pair of
+ vectors. (see wminkowski function documentation)
+
22. ``Y = pdist(X, f)``
Computes the distance between all pairs of vectors in X
@@ -1036,6 +1078,11 @@
for j in xrange(i+1, m):
dm[k] = minkowski(X[i, :], X[j, :], p)
k = k + 1
+ elif metric == wminkowski:
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = wminkowski(X[i, :], X[j, :], p, w)
+ k = k + 1
elif metric == seuclidean:
for i in xrange(0, m - 1):
for j in xrange(i+1, m):
@@ -1079,6 +1126,9 @@
_distance_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
elif mstr in set(['minkowski', 'mi', 'm']):
_distance_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
+ elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
+ _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(X),
+ dm, p, w)
elif mstr in set(['seuclidean', 'se', 's']):
if V is not None:
V = np.asarray(V)
@@ -1174,7 +1224,9 @@
elif metric == 'test_cityblock':
dm = pdist(X, cityblock)
elif metric == 'test_minkowski':
- dm = pdist(X, minkowski, p)
+ dm = pdist(X, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = pdist(X, wminkowski, p=p, w=w)
elif metric == 'test_cosine':
dm = pdist(X, cosine)
elif metric == 'test_correlation':
@@ -1502,7 +1554,7 @@
return d
-def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None):
+def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
"""
Computes distance between each pair of observations between two
collections of vectors. ``XA`` is a :math:`$m_A$` by :math:`$n$`
@@ -1529,8 +1581,18 @@
'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
'jaccard', 'kulsinski', 'mahalanobis', 'matching',
'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
- 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski',
+ 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
+
:Returns:
Y : ndarray
A :math:`$m_A$` by :math:`$m_B$` distance matrix.
@@ -1688,11 +1750,17 @@
21. ``Y = cdist(X, 'sokalsneath')``
- Computes the Sokal-Sneath distance between each pair of
- boolean vectors. (see sokalsneath function documentation)
+ Computes the Sokal-Sneath distance between the vectors. (see
+ sokalsneath function documentation)
- 22. ``Y = cdist(X, f)``
+ 22. ``Y = cdist(X, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between the
+ vectors. (see sokalsneath function documentation)
+
+ 23. ``Y = cdist(X, f)``
+
Computes the distance between all pairs of vectors in X
using the user supplied 2-arity function f. For example,
Euclidean distance between the vectors could be computed
@@ -1755,6 +1823,10 @@
for i in xrange(0, mA):
for j in xrange(0, mB):
dm[i, j] = minkowski(XA[i, :], XB[j, :], p)
+ elif metric == wminkowski:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = wminkowski(XA[i, :], XB[j, :], p, w)
elif metric == seuclidean:
for i in xrange(0, mA):
for j in xrange(0, mB):
@@ -1800,9 +1872,12 @@
elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
_distance_wrap.cdist_chebyshev_wrap(_convert_to_double(XA),
_convert_to_double(XB), dm)
- elif mstr in set(['minkowski', 'mi', 'm']):
+ elif mstr in set(['minkowski', 'mi', 'm', 'pnorm']):
_distance_wrap.cdist_minkowski_wrap(_convert_to_double(XA),
_convert_to_double(XB), dm, p)
+ elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
+ _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm, p, _convert_to_double(w))
elif mstr in set(['seuclidean', 'se', 's']):
if V is not None:
V = np.asarray(V)
@@ -1921,7 +1996,9 @@
elif metric == 'test_cityblock':
dm = cdist(XA, XB, cityblock)
elif metric == 'test_minkowski':
- dm = cdist(XA, XB, minkowski, p)
+ dm = cdist(XA, XB, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = cdist(XA, XB, wminkowski, p=p, w=w)
elif metric == 'test_cosine':
dm = cdist(XA, XB, cosine)
elif metric == 'test_correlation':
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/hierarchy.py 2008-09-30 19:14:59 UTC (rev 4758)
@@ -910,14 +910,13 @@
correlation coefficient and ``d`` is the condensed cophentic
distance matrix (upper triangular form).
"""
- Z = np.asarray(Z)
-
nargs = len(args)
if nargs < 1:
raise ValueError('At least one argument must be passed to cophenet.')
Z = args[0]
+ Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')
Zs = Z.shape
n = Zs[0] + 1
@@ -932,6 +931,7 @@
return zz
Y = args[1]
+ Y = np.asarray(Y)
Ys = Y.shape
distance.is_valid_y(Y, throw=True, name='Y')
Modified: trunk/scipy/cluster/src/distance.c
===================================================================
--- trunk/scipy/cluster/src/distance.c 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/src/distance.c 2008-09-30 19:14:59 UTC (rev 4758)
@@ -294,6 +294,16 @@
return pow(s, 1.0 / p);
}
+double weighted_minkowski_distance(const double *u, const double *v, int n, double p, const double *w) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]) * w[i];
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
void compute_mean_vector(double *res, const double *X, int m, int n) {
int i, j;
const double *v;
@@ -489,6 +499,19 @@
}
}
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
void pdist_yule_bool(const char *X, double *dm, int m, int n) {
int i, j;
const char *u, *v;
@@ -813,6 +836,19 @@
}
}
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm, int mA, int mB, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
void cdist_yule_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
int i, j;
const char *u, *v;
Modified: trunk/scipy/cluster/src/distance.h
===================================================================
--- trunk/scipy/cluster/src/distance.h 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/src/distance.h 2008-09-30 19:14:59 UTC (rev 4758)
@@ -55,6 +55,7 @@
void pdist_jaccard_bool(const char *X, double *dm, int m, int n);
void pdist_kulsinski_bool(const char *X, double *dm, int m, int n);
void pdist_minkowski(const double *X, double *dm, int m, int n, double p);
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w);
void pdist_yule_bool(const char *X, double *dm, int m, int n);
void pdist_matching_bool(const char *X, double *dm, int m, int n);
void pdist_dice_bool(const char *X, double *dm, int m, int n);
@@ -93,6 +94,8 @@
int mA, int mB, int n);
void cdist_minkowski(const double *XA, const double *XB, double *dm,
int mA, int mB, int n, double p);
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n, double p, const double *w);
void cdist_yule_bool(const char *XA, const char *XB, double *dm,
int mA, int mB, int n);
void cdist_matching_bool(const char *XA, const char *XB, double *dm,
Modified: trunk/scipy/cluster/src/distance_wrap.c
===================================================================
--- trunk/scipy/cluster/src/distance_wrap.c 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/src/distance_wrap.c 2008-09-30 19:14:59 UTC (rev 4758)
@@ -347,12 +347,36 @@
mA = XA_->dimensions[0];
mB = XB_->dimensions[0];
n = XA_->dimensions[1];
-
cdist_minkowski(XA, XB, dm, mA, mB, n, p);
}
return Py_BuildValue("d", 0.0);
}
+extern PyObject *cdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *w_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!O!dO!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ w = (const double*)w_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+ cdist_weighted_minkowski(XA, XB, dm, mA, mB, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
extern PyObject *cdist_yule_bool_wrap(PyObject *self, PyObject *args) {
PyArrayObject *XA_, *XB_, *dm_;
@@ -824,7 +848,31 @@
return Py_BuildValue("d", 0.0);
}
+extern PyObject *pdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *w_;
+ int m, n;
+ double *dm, *X, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!dO!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ w = (const double*)w_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+ pdist_weighted_minkowski(X, dm, m, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
extern PyObject *pdist_yule_bool_wrap(PyObject *self, PyObject *args) {
PyArrayObject *X_, *dm_;
int m, n;
@@ -1048,6 +1096,7 @@
{"cdist_mahalanobis_wrap", cdist_mahalanobis_wrap, METH_VARARGS},
{"cdist_matching_bool_wrap", cdist_matching_bool_wrap, METH_VARARGS},
{"cdist_minkowski_wrap", cdist_minkowski_wrap, METH_VARARGS},
+ {"cdist_weighted_minkowski_wrap", cdist_weighted_minkowski_wrap, METH_VARARGS},
{"cdist_rogerstanimoto_bool_wrap", cdist_rogerstanimoto_bool_wrap, METH_VARARGS},
{"cdist_russellrao_bool_wrap", cdist_russellrao_bool_wrap, METH_VARARGS},
{"cdist_seuclidean_wrap", cdist_seuclidean_wrap, METH_VARARGS},
@@ -1069,6 +1118,7 @@
{"pdist_mahalanobis_wrap", pdist_mahalanobis_wrap, METH_VARARGS},
{"pdist_matching_bool_wrap", pdist_matching_bool_wrap, METH_VARARGS},
{"pdist_minkowski_wrap", pdist_minkowski_wrap, METH_VARARGS},
+ {"pdist_weighted_minkowski_wrap", pdist_weighted_minkowski_wrap, METH_VARARGS},
{"pdist_rogerstanimoto_bool_wrap", pdist_rogerstanimoto_bool_wrap, METH_VARARGS},
{"pdist_russellrao_bool_wrap", pdist_russellrao_bool_wrap, METH_VARARGS},
{"pdist_seuclidean_wrap", pdist_seuclidean_wrap, METH_VARARGS},
Modified: trunk/scipy/cluster/tests/test_distance.py
===================================================================
--- trunk/scipy/cluster/tests/test_distance.py 2008-09-30 07:00:07 UTC (rev 4757)
+++ trunk/scipy/cluster/tests/test_distance.py 2008-09-30 19:14:59 UTC (rev 4758)
@@ -233,6 +233,44 @@
print (Y1-Y2).max()
self.failUnless(within_tol(Y1, Y2, eps))
+
+ def test_cdist_wminkowski_random_p3d8(self):
+ "Tests cdist(X, 'wminkowski') on random data. (p=3.8)"
+ eps = 1e-07
+ # Get the data: the input matrix and the right output.
+ X1 = eo['cdist-X1']
+ X2 = eo['cdist-X2']
+ w = 1.0 / X1.std(axis=0)
+ Y1 = cdist(X1, X2, 'wminkowski', p=3.8, w=w)
+ Y2 = cdist(X1, X2, 'test_wminkowski', p=3.8, w=w)
+ print (Y1-Y2).max()
+ self.failUnless(within_tol(Y1, Y2, eps))
+
+ def test_cdist_wminkowski_random_p4d6(self):
+ "Tests cdist(X, 'wminkowski') on random data. (p=4.6)"
+ eps = 1e-07
+ # Get the data: the input matrix and the right output.
+ X1 = eo['cdist-X1']
+ X2 = eo['cdist-X2']
+ w = 1.0 / X1.std(axis=0)
+ Y1 = cdist(X1, X2, 'wminkowski', p=4.6, w=w)
+ Y2 = cdist(X1, X2, 'test_wminkowski', p=4.6, w=w)
+ print (Y1-Y2).max()
+ self.failUnless(within_tol(Y1, Y2, eps))
+
+ def test_cdist_wminkowski_random_p1d23(self):
+ "Tests cdist(X, 'wminkowski') on random data. (p=1.23)"
+ eps = 1e-07
+ # Get the data: the input matrix and the right output.
+ X1 = eo['cdist-X1']
+ X2 = eo['cdist-X2']
+ w = 1.0 / X1.std(axis=0)
+ Y1 = cdist(X1, X2, 'wminkowski', p=1.23, w=w)
+ Y2 = cdist(X1, X2, 'test_wminkowski', p=1.23, w=w)
+ print (Y1-Y2).max()
+ self.failUnless(within_tol(Y1, Y2, eps))
+
+
def test_cdist_seuclidean_random(self):
"Tests cdist(X, 'seuclidean') on random data."
eps = 1e-07
More information about the Scipy-svn
mailing list