[Scipy-svn] r4396 - in trunk/scipy/cluster: . src tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Fri May 30 15:37:06 EDT 2008
Author: damian.eads
Date: 2008-05-30 14:36:47 -0500 (Fri, 30 May 2008)
New Revision: 4396
Modified:
trunk/scipy/cluster/hierarchy.py
trunk/scipy/cluster/src/hierarchy.c
trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Added some initial tests for hierarchy.linkage and hierarchy.squareform
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py 2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/hierarchy.py 2008-05-30 19:36:47 UTC (rev 4396)
@@ -735,7 +735,6 @@
# is indeed a binomial coefficient.
d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
- print d, s[0]
# Check that v is of valid dimensions.
if d * (d - 1) / 2 != int(s[0]):
raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
@@ -760,9 +759,9 @@
raise ValueError('The matrix argument must be square.')
if checks:
if np.sum(np.sum(X == X.transpose())) != np.product(X.shape):
- raise ValueError('The distance matrix must be symmetrical.')
+ raise ValueError('The distance matrix array must be symmetrical.')
if (X.diagonal() != 0).any():
- raise ValueError('The distance matrix must have zeros along the diagonal.')
+ raise ValueError('The distance matrix array must have zeros along the diagonal.')
# One-side of the dimensions is set here.
d = s[0]
@@ -780,7 +779,7 @@
elif len(s) != 2 and force.lower() == 'tomatrix':
raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
else:
- raise ValueError('The first argument must be a vector or matrix. A %d-dimensional array is not permitted' % len(s))
+ raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
def minkowski(u, v, p):
"""
@@ -1607,7 +1606,6 @@
the number of original observations (leaves) in the non-singleton
cluster i.
"""
- is_valid_linkage(Z, throw=True, name='Z')
Zs = Z.shape
Zpart = Z[:,0:2]
Zd = Z[:,2].reshape(Zs[0], 1)
Modified: trunk/scipy/cluster/src/hierarchy.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy.c 2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/src/hierarchy.c 2008-05-30 19:36:47 UTC (rev 4396)
@@ -734,7 +734,7 @@
xi = inds[i];
cnode *xnd = info->nodes + xi;
xn = xnd->n;
- mply = 1.0 / (((double)xn) * rscnt);
+ mply = (double)1.0 / (((double)xn) * rscnt);
*bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
}
for (i = mini + 1; i < minj; i++, bit++) {
@@ -743,7 +743,7 @@
xi = inds[i];
cnode *xnd = info->nodes + xi;
xn = xnd->n;
- mply = 1.0 / (((double)xn) * rscnt);
+ mply = (double)1.0 / (((double)xn) * rscnt);
*bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
}
for (i = minj + 1; i < np; i++, bit++) {
@@ -752,7 +752,7 @@
xi = inds[i];
cnode *xnd = info->nodes + xi;
xn = xnd->n;
- mply = 1.0 / (((double)xn) * rscnt);
+ mply = (double)1.0 / (((double)xn) * rscnt);
*bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
}
}
Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py 2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/tests/test_hierarchy.py 2008-05-30 19:36:47 UTC (rev 4396)
@@ -37,7 +37,7 @@
import sys
import os.path
from scipy.testing import *
-from scipy.cluster.hierarchy import pdist
+from scipy.cluster.hierarchy import pdist, squareform, linkage, from_mlab_linkage
import numpy
#import math
@@ -64,15 +64,21 @@
"pdist-euclidean-ml.txt",
"pdist-euclidean-ml-iris.txt",
"pdist-chebychev-ml.txt",
- "pdist-chebychev-ml-iris.txt"]
+ "pdist-chebychev-ml-iris.txt",
+ "linkage-single-tdist.txt",
+ "linkage-complete-tdist.txt",
+ "linkage-average-tdist.txt",
+ "linkage-weighted-tdist.txt"]
-_tdist = np.array([[0, 662, 877, 255, 412, 996],
- [662, 0, 295, 468, 268, 400],
- [877, 295, 0, 754, 564, 138],
- [255, 468, 754, 0, 219, 869],
- [412, 268, 564, 219, 0, 669],
- [996, 400, 138, 869, 669, 0 ]])
+_tdist = numpy.array([[0, 662, 877, 255, 412, 996],
+ [662, 0, 295, 468, 268, 400],
+ [877, 295, 0, 754, 564, 138],
+ [255, 468, 754, 0, 219, 869],
+ [412, 268, 564, 219, 0, 669],
+ [996, 400, 138, 869, 669, 0 ]], dtype='double')
+_ytdist = squareform(_tdist)
+
# A hashmap of expected output arrays for the tests. These arrays
# come from a list of text files, which are read prior to testing.
@@ -556,6 +562,104 @@
#print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
self.failUnless(within_tol(Y_test2, Y_right, eps))
+ def test_pdist_chebychev_iris_nonC(self):
+ "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
+ eps = 1e-15
+ # Get the data: the input matrix and the right output.
+ X = eo['iris']
+ Y_right = eo['pdist-chebychev-iris']
+ Y_test2 = pdist(X, 'test_chebychev')
+ #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
+ self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+ def test_squareform_empty_matrix(self):
+ "Tests squareform on an empty matrix."
+ A = numpy.zeros((0,0))
+ rA = squareform(numpy.array(A, dtype='double'))
+ self.failUnless(rA.shape == (0,))
+
+ def test_squareform_empty_vector(self):
+ v = numpy.zeros((0,))
+ rv = squareform(numpy.array(v, dtype='double'))
+ self.failUnless(rv.shape == (0,0))
+
+ def test_squareform_1by1_matrix(self):
+ "Tests squareform on a 1x1 matrix."
+ A = numpy.zeros((1,1))
+ rA = squareform(numpy.array(A, dtype='double'))
+ self.failUnless(rA.shape == (0,))
+
+ def test_squareform_one_vector(self):
+ "Tests squareform on a 1-D array, length=1."
+ v = numpy.ones((1,)) * 8.3
+ rv = squareform(numpy.array(v, dtype='double'))
+ self.failUnless(rv.shape == (2,2))
+ self.failUnless(rv[0,1] == 8.3)
+ self.failUnless(rv[1,0] == 8.3)
+
+ def test_squareform_2by2_matrix(self):
+ "Tests squareform on a 2x2 matrix."
+ A = numpy.zeros((2,2))
+ A[0,1]=0.8
+ A[1,0]=0.8
+ rA = squareform(numpy.array(A, dtype='double'))
+ self.failUnless(rA.shape == (1,))
+ self.failUnless(rA[0] == 0.8)
+
+ def test_squareform_multi_matrix(self):
+ "Tests squareform on a square matrices of multiple sizes."
+ for n in xrange(2, 5):
+ X = numpy.random.rand(n, 4)
+ Y = pdist(X)
+ A = squareform(Y)
+ Yr = squareform(A)
+ s = A.shape
+ k = 0
+ #print A.shape, Y.shape, Yr.shape
+ for i in xrange(0, s[0]):
+ for j in xrange(i+1, s[1]):
+ if i != j:
+ #print i, j, k, A[i, j], Y[k]
+ self.failUnless(A[i, j] == Y[k])
+ self.failUnless(Yr[k] == Y[k])
+ k += 1
+ else:
+ self.failUnless(A[i, j] == 0)
+
+ def test_linkage_single_tdist(self):
+ "Tests linkage(Y, 'single') on the tdist data set."
+ Z = linkage(_ytdist, 'single')
+ Zmlab = eo['linkage-single-tdist']
+ eps = 1e-10
+ expectedZ = from_mlab_linkage(Zmlab)
+ self.failUnless(within_tol(Z, expectedZ, eps))
+
+ def test_linkage_complete_tdist(self):
+ "Tests linkage(Y, 'complete') on the tdist data set."
+ Z = linkage(_ytdist, 'complete')
+ Zmlab = eo['linkage-complete-tdist']
+ eps = 1e-10
+ expectedZ = from_mlab_linkage(Zmlab)
+ self.failUnless(within_tol(Z, expectedZ, eps))
+
+ def test_linkage_average_tdist(self):
+ "Tests linkage(Y, 'average') on the tdist data set."
+ Z = linkage(_ytdist, 'average')
+ Zmlab = eo['linkage-average-tdist']
+ eps = 1e-05
+ expectedZ = from_mlab_linkage(Zmlab)
+ #print Z, expectedZ, numpy.abs(Z - expectedZ).max()
+ self.failUnless(within_tol(Z, expectedZ, eps))
+
+ def test_linkage_weighted_tdist(self):
+ "Tests linkage(Y, 'weighted') on the tdist data set."
+ Z = linkage(_ytdist, 'weighted')
+ Zmlab = eo['linkage-weighted-tdist']
+ eps = 1e-10
+ expectedZ = from_mlab_linkage(Zmlab)
+ #print Z, expectedZ, numpy.abs(Z - expectedZ).max()
+ self.failUnless(within_tol(Z, expectedZ, eps))
+
def within_tol(a, b, tol):
return numpy.abs(a - b).max() < tol
More information about the Scipy-svn
mailing list