[Scipy-svn] r5063 - in trunk/scipy/cluster: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Nov 12 14:33:20 EST 2008
Author: damian.eads
Date: 2008-11-12 13:33:13 -0600 (Wed, 12 Nov 2008)
New Revision: 5063
Modified:
trunk/scipy/cluster/hierarchy.py
trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Made cophenet return behavior less confusing.
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py 2008-11-12 07:26:02 UTC (rev 5062)
+++ trunk/scipy/cluster/hierarchy.py 2008-11-12 19:33:13 UTC (rev 5063)
@@ -873,21 +873,22 @@
X = X.copy()
return X
-def cophenet(*args):
+def cophenet(Z, Y=None):
"""
Calculates the cophenetic distances between each observation in
the hierarchical clustering defined by the linkage ``Z``.
- Suppose :math:`$p$` and :math:`$q$` are original observations in
- disjoint clusters :math:`$s$` and :math:`$t$`, respectively and
- :math:`$s$` and :math:`$t$` are joined by a direct parent cluster
- :math:`$u$`. The cophenetic distance between observations
- :math:`$i$` and :math:`$j$` is simply the distance between
- clusters :math:`$s$` and :math:`$t$`.
+ Suppose ``p`` and ``q`` are original observations in
+ disjoint clusters ``s`` and ``t``, respectively and
+ ``s`` and ``t`` are joined by a direct parent cluster
+ ``u``. The cophenetic distance between observations
+ ``i`` and ``j`` is simply the distance between
+ clusters ``s`` and ``t``.
:Parameters:
- Z : ndarray
- The encoded linkage matrix on which to perform the calculation.
+ The hierarchical clustering encoded as an array
+ (see ``linkage`` function).
- Y : ndarray (optional)
Calculates the cophenetic correlation coefficient ``c`` of a
@@ -902,16 +903,11 @@
- d : ndarray
The cophenetic distance matrix in condensed form. The
- :math:`$ij$`th entry is the cophenetic distance between
+ :math:`$ij$` th entry is the cophenetic distance between
original observations :math:`$i$` and :math:`$j$`.
"""
- nargs = len(args)
- if nargs < 1:
- raise ValueError('At least one argument must be passed to cophenet.')
-
- Z = args[0]
Z = np.asarray(Z, order='c')
is_valid_linkage(Z, throw=True, name='Z')
Zs = Z.shape
@@ -923,10 +919,9 @@
Z = _convert_to_double(Z)
_hierarchy_wrap.cophenetic_distances_wrap(Z, zz, int(n))
- if nargs == 1:
+ if Y is None:
return zz
- Y = args[1]
Y = np.asarray(Y, order='c')
Ys = Y.shape
distance.is_valid_y(Y, throw=True, name='Y')
@@ -941,12 +936,8 @@
denomB = Zz ** 2
c = numerator.sum() / np.sqrt((denomA.sum() * denomB.sum()))
#print c, numerator.sum()
- if nargs == 2:
- return c
+ return (c, zz)
- if nargs == 3:
- return (c, zz)
-
def inconsistent(Z, d=2):
"""
Calculates inconsistency statistics on a linkage.
@@ -2308,7 +2299,7 @@
n = Z.shape[0] + 1
MI = np.zeros((n-1,))
[Z, R] = _copy_arrays_if_base_present([Z, R])
- _hierarchy_wrap.get_max_Rfield_for_each_hierarchy_wrap(Z, R, MI, int(n), 3)
+ _hierarchy_wrap.get_max_Rfield_for_each_cluster_wrap(Z, R, MI, int(n), 3)
return MI
def maxRstat(Z, R, i):
@@ -2332,7 +2323,7 @@
n = Z.shape[0] + 1
MR = np.zeros((n-1,))
[Z, R] = _copy_arrays_if_base_present([Z, R])
- _hierarchy_wrap.get_max_Rfield_for_each_hierarchy_wrap(Z, R, MR, int(n), i)
+ _hierarchy_wrap.get_max_Rfield_for_each_cluster_wrap(Z, R, MR, int(n), i)
return MR
def leaders(Z, T):
Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py 2008-11-12 07:26:02 UTC (rev 5062)
+++ trunk/scipy/cluster/tests/test_hierarchy.py 2008-11-12 19:33:13 UTC (rev 5063)
@@ -350,19 +350,11 @@
def test_linkage_cophenet_tdist_Z_Y(self):
"Testing cophenet(Z, Y) on tdist data set."
Z = linkage(_ytdist, 'single')
- c = cophenet(Z, _ytdist)
+ (c, M) = cophenet(Z, _ytdist)
+ expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, 295, 138, 219, 295, 295]);
expectedc = 0.639931296433393415057366837573
eps = 1e-10
self.failUnless(np.abs(c - expectedc) <= eps)
-
- def test_linkage_cophenet_tdist_Z_Y_EL(self):
- "Testing cophenet(Z, Y, []) on tdist data set."
- Z = linkage(_ytdist, 'single')
- (c, M) = cophenet(Z, _ytdist, [])
- eps = 1e-10
- expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295, 295, 138, 219, 295, 295]);
- expectedc = 0.639931296433393415057366837573
- self.failUnless(np.abs(c - expectedc) <= eps)
self.failUnless(within_tol(M, expectedM, eps))
class TestFromMLabLinkage(TestCase):
More information about the Scipy-svn
mailing list