[Scipy-svn] r5182 - in trunk/scipy/cluster: . src tests

scipy-svn at scipy.org scipy-svn at scipy.org
Mon Nov 24 17:09:46 EST 2008


Author: damian.eads
Date: 2008-11-24 16:09:34 -0600 (Mon, 24 Nov 2008)
New Revision: 5182

Modified:
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/cluster/src/hierarchy.c
   trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Added regression tests for scipy.cluster.hierarchy.maxdists.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-11-24 09:54:25 UTC (rev 5181)
+++ trunk/scipy/cluster/hierarchy.py	2008-11-24 22:09:34 UTC (rev 5182)
@@ -1212,7 +1212,7 @@
             else:
                 raise ValueError('Linkage matrix must have 4 columns.')
         if Z.shape[0] == 0:
-            raise ValueError('Linkage must be over at least one observation.')
+            raise ValueError('Linkage must be computed on at least two observations.')
         n = Z.shape[0]
         if n > 1:
             if ((Z[:,0] < 0).any() or
@@ -2376,13 +2376,13 @@
           specifically, ``MD[i] = Z[Q(i)-n, 2].max()`` where ``Q(i)`` is the
           set of all node indices below and including node i.
     """
-    Z = np.asarray(Z, order='c')
+    Z = np.asarray(Z, order='c', dtype=np.double)
     is_valid_linkage(Z, throw=True, name='Z')
 
     n = Z.shape[0] + 1
     MD = np.zeros((n-1,))
     [Z] = _copy_arrays_if_base_present([Z])
-    _hierarchy_wrap.get_max_dist_for_each_hierarchy_wrap(Z, MD, int(n))
+    _hierarchy_wrap.get_max_dist_for_each_cluster_wrap(Z, MD, int(n))
     return MD
 
 def maxinconsts(Z, R):

Modified: trunk/scipy/cluster/src/hierarchy.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy.c	2008-11-24 09:54:25 UTC (rev 5181)
+++ trunk/scipy/cluster/src/hierarchy.c	2008-11-24 22:09:34 UTC (rev 5182)
@@ -841,7 +841,6 @@
 
     /** Rows i+1 to j-1 lose one unit of space, so we move them up. */
     /** Rows j to np-1 lose no space. We do nothing to them. */
-
     /**    memcpy(rows[0], buf, sizeof(double) * rowsize[0] - k);*/
 
     for (i = 0; i < mini; i++) {
@@ -1418,7 +1417,8 @@
       max_dist = CPY_MAX(max_dist, max_dists[rid-n]);
     }
     max_dists[ndid-n] = max_dist;
-    CPY_DEBUG_MSG("i=%d maxdist[i]=%5.5f verif=%5.5f\n", ndid-n, max_dist, max_dists[ndid-n]);
+    CPY_DEBUG_MSG("i=%d maxdist[i]=%5.5f verif=%5.5f\n",
+		  ndid-n, max_dist, max_dists[ndid-n]);
     k--;
   }
   free(curNode);

Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py	2008-11-24 09:54:25 UTC (rev 5181)
+++ trunk/scipy/cluster/tests/test_hierarchy.py	2008-11-24 22:09:34 UTC (rev 5182)
@@ -38,7 +38,7 @@
 import numpy as np
 from numpy.testing import *
 
-from scipy.cluster.hierarchy import linkage, from_mlab_linkage, to_mlab_linkage, num_obs_linkage, inconsistent, cophenet, from_mlab_linkage, fclusterdata, fcluster, is_isomorphic, single, complete, average, weighted, centroid, median, ward, leaders, correspond, is_monotonic
+from scipy.cluster.hierarchy import linkage, from_mlab_linkage, to_mlab_linkage, num_obs_linkage, inconsistent, cophenet, from_mlab_linkage, fclusterdata, fcluster, is_isomorphic, single, complete, average, weighted, centroid, median, ward, leaders, correspond, is_monotonic, maxdists
 from scipy.spatial.distance import squareform, pdist
 
 _tdist = np.array([[0,    662,  877,  255,  412,  996],
@@ -685,15 +685,109 @@
         Z = linkage(X, 'single')
         self.failUnless(is_monotonic(Z) == True)
 
-def help_single_inconsistent_depth(self, i):
-    Y = squareform(_tdist)
-    Z = linkage(Y, 'single')
-    R = inconsistent(Z, i)
-    Rright = eo['inconsistent-single-tdist-depth-' + str(i)]
-    eps = 1e-05
-    print np.abs(R - Rright).max()
-    self.failUnless(within_tol(R, Rright, eps))
+class TestMaxDists(TestCase):
 
+    def test_maxdists_empty_linkage(self):
+        "Tests maxdists(Z) on empty linkage. Expecting exception."
+        Z = np.zeros((0, 4), dtype=np.double)
+        self.failUnlessRaises(ValueError, maxdists, Z)
+
+    def test_maxdists_one_cluster_linkage(self):
+        "Tests maxdists(Z) on linkage with one cluster."
+        Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+    def test_maxdists_Q_linkage_single(self):
+        "Tests maxdists(Z) on the Q data set using single linkage."
+        X = eo['Q-X']
+        Y = pdist(X)
+        Z = linkage(X, 'single')
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+    def test_maxdists_Q_linkage_complete(self):
+        "Tests maxdists(Z) on the Q data set using complete linkage."
+        X = eo['Q-X']
+        Y = pdist(X)
+        Z = linkage(X, 'complete')
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+    def test_maxdists_Q_linkage_ward(self):
+        "Tests maxdists(Z) on the Q data set using Ward linkage."
+        X = eo['Q-X']
+        Y = pdist(X)
+        Z = linkage(X, 'ward')
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+    def test_maxdists_Q_linkage_centroid(self):
+        "Tests maxdists(Z) on the Q data set using centroid linkage."
+        X = eo['Q-X']
+        Y = pdist(X)
+        Z = linkage(X, 'centroid')
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        print np.abs(expectedMD - MD)
+        print is_monotonic(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+    def test_maxdists_Q_linkage_median(self):
+        "Tests maxdists(Z) on the Q data set using median linkage."
+        X = eo['Q-X']
+        Y = pdist(X)
+        Z = linkage(X, 'median')
+        MD = maxdists(Z)
+        eps = 1e-15
+        expectedMD = calculate_maximum_distances(Z)
+        print np.abs(expectedMD - MD).max()
+        print is_monotonic(Z)
+        self.failUnless(within_tol(MD, expectedMD, eps))
+
+def calculate_maximum_distances(Z):
+    "Used for testing correctness of maxdists. Very slow."
+    n = Z.shape[0] + 1
+    B = np.zeros((n-1,))
+    q = np.zeros((3,))
+    for i in xrange(0, n - 1):
+        q[:] = 0.0
+        L = Z[i, 0]
+        R = Z[i, 1]
+        if L >= n:
+            q[0] = B[L - n]
+        if R >= n:
+            q[1] = B[R - n]
+        q[2] = Z[i, 2]
+        B[i] = q.max()
+    return B
+
+def calculate_maximum_inconsistent(Z, R):
+    "Used for testing correctness of maxinconsts. Very slow."
+    n = Z.shape[0] + 1
+    B = np.zeros((n-1,))
+    q = np.zeros((3,))
+    for i in xrange(0, n - 1):
+        q[:] = 0.0
+        L = Z[i, 0]
+        R = Z[i, 1]
+        if L >= n:
+            q[0] = B[L - n]
+        if R >= n:
+            q[1] = B[R - n]
+        q[2] = R[i, 2]
+        B[i] = q.max()
+    return B
+
 def within_tol(a, b, tol):
     return np.abs(a - b).max() < tol
 




More information about the Scipy-svn mailing list