[Scipy-svn] r4396 - in trunk/scipy/cluster: . src tests

scipy-svn at scipy.org scipy-svn at scipy.org
Fri May 30 15:37:06 EDT 2008


Author: damian.eads
Date: 2008-05-30 14:36:47 -0500 (Fri, 30 May 2008)
New Revision: 4396

Modified:
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/cluster/src/hierarchy.c
   trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Added some initial tests for hierarchy.linkage and hierarchy.squareform

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/hierarchy.py	2008-05-30 19:36:47 UTC (rev 4396)
@@ -735,7 +735,6 @@
         # is indeed a binomial coefficient.
         d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
 
-        print d, s[0]
         # Check that v is of valid dimensions.
         if d * (d - 1) / 2 != int(s[0]):
             raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
@@ -760,9 +759,9 @@
             raise ValueError('The matrix argument must be square.')
         if checks:
             if np.sum(np.sum(X == X.transpose())) != np.product(X.shape):
-                raise ValueError('The distance matrix must be symmetrical.')
+                raise ValueError('The distance matrix array must be symmetrical.')
             if (X.diagonal() != 0).any():
-                raise ValueError('The distance matrix must have zeros along the diagonal.')
+                raise ValueError('The distance matrix array must have zeros along the diagonal.')
 
         # One-side of the dimensions is set here.
         d = s[0]
@@ -780,7 +779,7 @@
     elif len(s) != 2 and force.lower() == 'tomatrix':
         raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
     else:
-        raise ValueError('The first argument must be a vector or matrix. A %d-dimensional array is not permitted' % len(s))
+        raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
 
 def minkowski(u, v, p):
     """
@@ -1607,7 +1606,6 @@
        the number of original observations (leaves) in the non-singleton
        cluster i.
     """
-    is_valid_linkage(Z, throw=True, name='Z')
     Zs = Z.shape
     Zpart = Z[:,0:2]
     Zd = Z[:,2].reshape(Zs[0], 1)

Modified: trunk/scipy/cluster/src/hierarchy.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy.c	2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/src/hierarchy.c	2008-05-30 19:36:47 UTC (rev 4396)
@@ -734,7 +734,7 @@
     xi = inds[i];
     cnode *xnd = info->nodes + xi;
     xn = xnd->n;
-    mply = 1.0 / (((double)xn) * rscnt);
+    mply = (double)1.0 / (((double)xn) * rscnt);
     *bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
   }
   for (i = mini + 1; i < minj; i++, bit++) {
@@ -743,7 +743,7 @@
     xi = inds[i];
     cnode *xnd = info->nodes + xi;
     xn = xnd->n;
-    mply = 1.0 / (((double)xn) * rscnt);
+    mply = (double)1.0 / (((double)xn) * rscnt);
     *bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
   }
   for (i = minj + 1; i < np; i++, bit++) {
@@ -752,7 +752,7 @@
     xi = inds[i];
     cnode *xnd = info->nodes + xi;
     xn = xnd->n;
-    mply = 1.0 / (((double)xn) * rscnt);
+    mply = (double)1.0 / (((double)xn) * rscnt);
     *bit = mply * ((drx * (rc * xn)) + (dsx * (sc * xn)));
   }
 }

Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py	2008-05-30 02:23:09 UTC (rev 4395)
+++ trunk/scipy/cluster/tests/test_hierarchy.py	2008-05-30 19:36:47 UTC (rev 4396)
@@ -37,7 +37,7 @@
 import sys
 import os.path
 from scipy.testing import *
-from scipy.cluster.hierarchy import pdist
+from scipy.cluster.hierarchy import pdist, squareform, linkage, from_mlab_linkage
 
 import numpy
 #import math
@@ -64,15 +64,21 @@
               "pdist-euclidean-ml.txt",
               "pdist-euclidean-ml-iris.txt",
               "pdist-chebychev-ml.txt",
-              "pdist-chebychev-ml-iris.txt"]
+              "pdist-chebychev-ml-iris.txt",
+              "linkage-single-tdist.txt",
+              "linkage-complete-tdist.txt",
+              "linkage-average-tdist.txt",
+              "linkage-weighted-tdist.txt"]
 
-_tdist = np.array([[0,    662,  877,  255,  412,  996],
-                   [662,  0,    295,  468,  268,  400],
-                   [877,  295,  0,    754,  564,  138],
-                   [255,  468,  754,  0,    219,  869],
-                   [412,  268,  564,  219,  0,    669],
-                   [996,  400,  138,  869,  669,  0  ]])
+_tdist = numpy.array([[0,    662,  877,  255,  412,  996],
+                      [662,  0,    295,  468,  268,  400],
+                      [877,  295,  0,    754,  564,  138],
+                      [255,  468,  754,  0,    219,  869],
+                      [412,  268,  564,  219,  0,    669],
+                      [996,  400,  138,  869,  669,  0  ]], dtype='double')
 
+_ytdist = squareform(_tdist)
+
 # A hashmap of expected output arrays for the tests. These arrays
 # come from a list of text files, which are read prior to testing.
 
@@ -556,6 +562,104 @@
         #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
         self.failUnless(within_tol(Y_test2, Y_right, eps))
 
+    def test_pdist_chebychev_iris_nonC(self):
+        "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
+        eps = 1e-15
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = eo['pdist-chebychev-iris']
+        Y_test2 = pdist(X, 'test_chebychev')
+        #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
+        self.failUnless(within_tol(Y_test2, Y_right, eps))
+
+    def test_squareform_empty_matrix(self):
+        "Tests squareform on an empty matrix."
+        A = numpy.zeros((0,0))
+        rA = squareform(numpy.array(A, dtype='double'))
+        self.failUnless(rA.shape == (0,))
+
+    def test_squareform_empty_vector(self):
+        v = numpy.zeros((0,))
+        rv = squareform(numpy.array(v, dtype='double'))
+        self.failUnless(rv.shape == (0,0))
+
+    def test_squareform_1by1_matrix(self):
+        "Tests squareform on a 1x1 matrix."
+        A = numpy.zeros((1,1))
+        rA = squareform(numpy.array(A, dtype='double'))
+        self.failUnless(rA.shape == (0,))
+
+    def test_squareform_one_vector(self):
+        "Tests squareform on a 1-D array, length=1."
+        v = numpy.ones((1,)) * 8.3
+        rv = squareform(numpy.array(v, dtype='double'))
+        self.failUnless(rv.shape == (2,2))
+        self.failUnless(rv[0,1] == 8.3)
+        self.failUnless(rv[1,0] == 8.3)
+
+    def test_squareform_2by2_matrix(self):
+        "Tests squareform on a 2x2 matrix."
+        A = numpy.zeros((2,2))
+        A[0,1]=0.8
+        A[1,0]=0.8
+        rA = squareform(numpy.array(A, dtype='double'))
+        self.failUnless(rA.shape == (1,))
+        self.failUnless(rA[0] == 0.8)
+
+    def test_squareform_multi_matrix(self):
+        "Tests squareform on a square matrices of multiple sizes."
+        for n in xrange(2, 5):
+            X = numpy.random.rand(n, 4)
+            Y = pdist(X)
+            A = squareform(Y)
+            Yr = squareform(A)
+            s = A.shape
+            k = 0
+            #print A.shape, Y.shape, Yr.shape
+            for i in xrange(0, s[0]):
+                for j in xrange(i+1, s[1]):
+                    if i != j:
+                        #print i, j, k, A[i, j], Y[k]
+                        self.failUnless(A[i, j] == Y[k])
+                        self.failUnless(Yr[k] == Y[k])
+                        k += 1
+                    else:
+                        self.failUnless(A[i, j] == 0)
+
+    def test_linkage_single_tdist(self):
+        "Tests linkage(Y, 'single') on the tdist data set."
+        Z = linkage(_ytdist, 'single')
+        Zmlab = eo['linkage-single-tdist']
+        eps = 1e-10
+        expectedZ = from_mlab_linkage(Zmlab)
+        self.failUnless(within_tol(Z, expectedZ, eps))
+
+    def test_linkage_complete_tdist(self):
+        "Tests linkage(Y, 'complete') on the tdist data set."
+        Z = linkage(_ytdist, 'complete')
+        Zmlab = eo['linkage-complete-tdist']
+        eps = 1e-10
+        expectedZ = from_mlab_linkage(Zmlab)
+        self.failUnless(within_tol(Z, expectedZ, eps))
+
+    def test_linkage_average_tdist(self):
+        "Tests linkage(Y, 'average') on the tdist data set."
+        Z = linkage(_ytdist, 'average')
+        Zmlab = eo['linkage-average-tdist']
+        eps = 1e-05
+        expectedZ = from_mlab_linkage(Zmlab)
+        #print Z, expectedZ, numpy.abs(Z - expectedZ).max()
+        self.failUnless(within_tol(Z, expectedZ, eps))
+
+    def test_linkage_weighted_tdist(self):
+        "Tests linkage(Y, 'weighted') on the tdist data set."
+        Z = linkage(_ytdist, 'weighted')
+        Zmlab = eo['linkage-weighted-tdist']
+        eps = 1e-10
+        expectedZ = from_mlab_linkage(Zmlab)
+        #print Z, expectedZ, numpy.abs(Z - expectedZ).max()
+        self.failUnless(within_tol(Z, expectedZ, eps))
+
 def within_tol(a, b, tol):
     return numpy.abs(a - b).max() < tol
 




More information about the Scipy-svn mailing list