[Scipy-svn] r4666 - in trunk/scipy/cluster: . src tests

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Aug 23 01:21:16 EDT 2008


Author: damian.eads
Date: 2008-08-23 00:21:05 -0500 (Sat, 23 Aug 2008)
New Revision: 4666

Modified:
   trunk/scipy/cluster/distance.py
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/cluster/src/distance.c
   trunk/scipy/cluster/src/distance.h
   trunk/scipy/cluster/src/distance_wrap.c
   trunk/scipy/cluster/src/hierarchy.c
   trunk/scipy/cluster/src/hierarchy.h
   trunk/scipy/cluster/src/hierarchy_wrap.c
   trunk/scipy/cluster/tests/test_distance.py
   trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Moved some remaining distance-related functions from scipy.distance to scipy.cluster.

Modified: trunk/scipy/cluster/distance.py
===================================================================
--- trunk/scipy/cluster/distance.py	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/distance.py	2008-08-23 05:21:05 UTC (rev 4666)
@@ -7,36 +7,74 @@
 stored in a rectangular array.
 
 +------------------+-------------------------------------------------+
+|*Function*        | *Description*                                   |
++------------------+-------------------------------------------------+
 |pdist             | computes distances between observation pairs.   |
 +------------------+-------------------------------------------------+
+|squareform        | converts a square distance matrix to a          |
+|                  | condensed one and vice versa.                   |
++------------------+-------------------------------------------------+
 
+Predicates for checking the validity of distance matrices, both
+condensed and redundant.
+
++------------------+-------------------------------------------------+
+|*Function*        | *Description*                                   |
++------------------+-------------------------------------------------+
+|is_valid_dm       | checks for a valid distance matrix.             |
++------------------+-------------------------------------------------+
+|is_valid_y        | checks for a valid condensed distance matrix.
++------------------+-------------------------------------------------+
+
 Distance functions between two vectors ``u`` and ``v``. Computing
 distances over a large collection of vectors is inefficient for these
 functions. Use ``pdist`` for this purpose.
 
 +------------------+-------------------------------------------------+
-|braycurtis        | the Bray-Curtis distance.                       |
-|canberra          | the Canberra distance.                          |
-|chebyshev         | the Chebyshev distance.                         |
-|cityblock         | the Manhattan distance.                         |
-|correlation       | the Correlation distance.                       |
-|cosine            | the Cosine distance.                            |
-|dice              | the Dice dissimilarity (boolean).               |
-|euclidean         | the Euclidean distance.                         |
-|hamming           | the Hamming distance (boolean).                 |
-|jaccard           | the Jaccard distance (boolean).                 |
-|kulsinski         | the Kulsinski distance (boolean).               |
-|mahalanobis       | the Mahalanobis distance.                       |
-|matching          | the matching dissimilarity (boolean).           |
-|minkowski         | the Minkowski distance.                         |
-|rogerstanimoto    | the Rogers-Tanimoto dissimilarity (boolean).    |
-|russellrao        | the Russell-Rao dissimilarity (boolean).        |
-|seuclidean        | the normalized Euclidean distance.              |
-|sokalmichener     | the Sokal-Michener dissimilarity (boolean).     |
-|sokalsneath       | the Sokal-Sneath dissimilarity (boolean).       |
-|sqeuclidean       | the squared Euclidean distance.                 |
-|yule              | the Yule dissimilarity (boolean).               |
+|*Function*        | *Description*                                   |
 +------------------+-------------------------------------------------+
+| braycurtis       | the Bray-Curtis distance.                       |
++------------------+-------------------------------------------------+
+| canberra         | the Canberra distance.                          |
++------------------+-------------------------------------------------+
+| chebyshev        | the Chebyshev distance.                         |
++------------------+-------------------------------------------------+
+| cityblock        | the Manhattan distance.                         |
++------------------+-------------------------------------------------+
+| correlation      | the Correlation distance.                       |
++------------------+-------------------------------------------------+
+| cosine           | the Cosine distance.                            |
++------------------+-------------------------------------------------+
+| dice             | the Dice dissimilarity (boolean).               |
++------------------+-------------------------------------------------+
+| euclidean        | the Euclidean distance.                         |
++------------------+-------------------------------------------------+
+| hamming          | the Hamming distance (boolean).                 |
++------------------+-------------------------------------------------+
+| jaccard          | the Jaccard distance (boolean).                 |
++------------------+-------------------------------------------------+
+| kulsinski        | the Kulsinski distance (boolean).               |
++------------------+-------------------------------------------------+
+| mahalanobis      | the Mahalanobis distance.                       |
++------------------+-------------------------------------------------+
+| matching         | the matching dissimilarity (boolean).           |
++------------------+-------------------------------------------------+
+| minkowski        | the Minkowski distance.                         |
++------------------+-------------------------------------------------+
+| rogerstanimoto   | the Rogers-Tanimoto dissimilarity (boolean).    |
++------------------+-------------------------------------------------+
+| russellrao       | the Russell-Rao dissimilarity (boolean).        |
++------------------+-------------------------------------------------+
+| seuclidean       | the normalized Euclidean distance.              |
++------------------+-------------------------------------------------+
+| sokalmichener    | the Sokal-Michener dissimilarity (boolean).     |
++------------------+-------------------------------------------------+
+| sokalsneath      | the Sokal-Sneath dissimilarity (boolean).       |
++------------------+-------------------------------------------------+
+| sqeuclidean      | the squared Euclidean distance.                 |
++------------------+-------------------------------------------------+
+| yule             | the Yule dissimilarity (boolean).               |
++------------------+-------------------------------------------------+
 
 
 References
@@ -1161,3 +1199,296 @@
     else:
         raise TypeError('2nd argument metric must be a string identifier or a function.')
     return dm
+def squareform(X, force="no", checks=True):
+    """
+    Converts a vector-form distance vector to a square-form distance
+    matrix, and vice-versa.
+
+    :Parameters:
+       X : ndarray
+           Either a condensed or redundant distance matrix.
+
+    :Returns:
+       Y : ndarray
+           If a condensed distance matrix is passed, a redundant
+           one is returned, or if a redundant one is passed, a
+           condensed distance matrix is returned.
+
+       force : string
+           As with MATLAB(TM), if force is equal to 'tovector' or
+           'tomatrix', the input will be treated as a distance matrix
+           or distance vector respectively.
+
+       checks : bool
+           If ``checks`` is set to ``False``, no checks will be made
+           for matrix symmetry nor zero diagonals. This is useful if
+           it is known that ``X - X.T1`` is small and ``diag(X)`` is
+           close to zero. These values are ignored any way so they do
+           not disrupt the squareform transformation.
+       
+
+    Calling Conventions
+    -------------------
+
+    1. v = squareform(X)
+
+       Given a square d by d symmetric distance matrix ``X``,
+       ``v=squareform(X)`` returns a :math:`$d*(d-1)/2$` (or
+       `${n \choose 2}$`) sized vector v.
+
+      v[{n \choose 2}-{n-i \choose 2} + (j-i-1)] is the distance
+      between points i and j. If X is non-square or asymmetric, an error
+      is returned.
+
+    X = squareform(v)
+
+      Given a d*d(-1)/2 sized v for some integer d>=2 encoding distances
+      as described, X=squareform(v) returns a d by d distance matrix X. The
+      X[i, j] and X[j, i] values are set to
+      v[{n \choose 2}-{n-i \choose 2} + (j-u-1)] and all
+      diagonal elements are zero.
+
+    """
+
+    X = _convert_to_double(np.asarray(X))
+
+    if not np.issubsctype(X, np.double):
+        raise TypeError('A double array must be passed.')
+
+    s = X.shape
+
+    # X = squareform(v)
+    if len(s) == 1 and force != 'tomatrix':
+        if X.shape[0] == 0:
+            return np.zeros((1,1), dtype=np.double)
+
+        # Grab the closest value to the square root of the number
+        # of elements times 2 to see if the number of elements
+        # is indeed a binomial coefficient.
+        d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
+
+        # Check that v is of valid dimensions.
+        if d * (d - 1) / 2 != int(s[0]):
+            raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
+
+        # Allocate memory for the distance matrix.
+        M = np.zeros((d, d), dtype=np.double)
+
+        # Since the C code does not support striding using strides.
+        # The dimensions are used instead.
+        [X] = _copy_arrays_if_base_present([X])
+
+        # Fill in the values of the distance matrix.
+        _distance_wrap.to_squareform_from_vector_wrap(M, X)
+
+        # Return the distance matrix.
+        M = M + M.transpose()
+        return M
+    elif len(s) != 1 and force.lower() == 'tomatrix':
+        raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
+    elif len(s) == 2 and force.lower() != 'tovector':
+        if s[0] != s[1]:
+            raise ValueError('The matrix argument must be square.')
+        if checks:
+            if np.sum(np.sum(X == X.transpose())) != np.product(X.shape):
+                raise ValueError('The distance matrix array must be symmetrical.')
+            if (X.diagonal() != 0).any():
+                raise ValueError('The distance matrix array must have zeros along the diagonal.')
+
+        # One-side of the dimensions is set here.
+        d = s[0]
+
+        if d <= 1:
+            return np.array([], dtype=np.double)
+
+        # Create a vector.
+        v = np.zeros(((d * (d - 1) / 2),), dtype=np.double)
+
+        # Since the C code does not support striding using strides.
+        # The dimensions are used instead.
+        [X] = _copy_arrays_if_base_present([X])
+
+        # Convert the vector to squareform.
+        _distance_wrap.to_vector_from_squareform_wrap(X, v)
+        return v
+    elif len(s) != 2 and force.lower() == 'tomatrix':
+        raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
+    else:
+        raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
+
+def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
+    """
+    Returns True if the variable D passed is a valid distance matrix.
+    Distance matrices must be 2-dimensional numpy arrays containing
+    doubles. They must have a zero-diagonal, and they must be symmetric.
+
+    :Parameters:
+       D : ndarray
+           The candidate object to test for validity.
+       tol : double
+           The distance matrix should be symmetric. tol is the maximum
+           difference between the :math:`$ij$`th entry and the
+           :math:`$ji$`th entry for the distance metric to be
+           considered symmetric.
+       throw : bool
+           An exception is thrown if the distance matrix passed is not
+           valid.
+       name : string
+           the name of the variable to checked. This is useful ifa
+           throw is set to ``True`` so the offending variable can be
+           identified in the exception message when an exception is
+           thrown.
+       warning : boolx
+           Instead of throwing an exception, a warning message is
+           raised.
+
+    :Returns:
+       Returns ``True`` if the variable ``D`` passed is a valid
+       distance matrix.  Small numerical differences in ``D`` and
+       ``D.T`` and non-zeroness of the diagonal are ignored if they are
+       within the tolerance specified by ``tol``.
+    """
+    D = np.asarray(D)
+    valid = True
+    try:
+        if type(D) != np.ndarray:
+            if name:
+                raise TypeError('\'%s\' passed as a distance matrix is not a numpy array.' % name)
+            else:
+                raise TypeError('Variable is not a numpy array.')
+        s = D.shape
+        if D.dtype != np.double:
+            if name:
+                raise TypeError('Distance matrix \'%s\' must contain doubles (double).' % name)
+            else:
+                raise TypeError('Distance matrix must contain doubles (double).')
+        if len(D.shape) != 2:
+            if name:
+                raise ValueError('Distance matrix \'%s\' must have shape=2 (i.e. be two-dimensional).' % name)
+            else:
+                raise ValueError('Distance matrix must have shape=2 (i.e. be two-dimensional).')
+        if tol == 0.0:
+            if not (D == D.T).all():
+                if name:
+                    raise ValueError('Distance matrix \'%s\' must be symmetric.' % name)
+                else:
+                    raise ValueError('Distance matrix must be symmetric.')
+            if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all():
+                if name:
+                    raise ValueError('Distance matrix \'%s\' diagonal must be zero.' % name)
+                else:
+                    raise ValueError('Distance matrix diagonal must be zero.')
+        else:
+            if not (D - D.T <= tol).all():
+                if name:
+                    raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, tol))
+                else:
+                    raise ValueError('Distance matrix must be symmetric within tolerance %5.5f.' % tol)
+            if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
+                if name:
+                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % (name, tol))
+                else:
+                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % tol)
+    except Exception, e:
+        if throw:
+            raise
+        if warning:
+            _warning(str(e))
+        valid = False
+    return valid
+
+def is_valid_y(y, warning=False, throw=False, name=None):
+    """
+    Returns ``True`` if the variable ``y`` passed is a valid condensed
+    distance matrix. Condensed distance matrices must be 1-dimensional
+    numpy arrays containing doubles. Their length must be a binomial
+    coefficient :math:`${n \choose 2}$` for some positive integer n.
+
+
+    :Parameters:
+       y : ndarray
+           The condensed distance matrix.
+
+       warning : bool
+           Invokes a warning if the variable passed is not a valid
+           condensed distance matrix. The warning message explains why
+           the distance matrix is not valid.  'name' is used when
+           referencing the offending variable.
+
+       throws : throw
+           Throws an exception if the variable passed is not a valid
+           condensed distance matrix.
+
+       name : bool
+           Used when referencing the offending variable in the
+           warning or exception message.
+
+    """
+    y = np.asarray(y)
+    valid = True
+    try:
+        if type(y) != np.ndarray:
+            if name:
+                raise TypeError('\'%s\' passed as a condensed distance matrix is not a numpy array.' % name)
+            else:
+                raise TypeError('Variable is not a numpy array.')
+        if y.dtype != np.double:
+            if name:
+                raise TypeError('Condensed distance matrix \'%s\' must contain doubles (double).' % name)
+            else:
+                raise TypeError('Condensed distance matrix must contain doubles (double).')
+        if len(y.shape) != 1:
+            if name:
+                raise ValueError('Condensed distance matrix \'%s\' must have shape=1 (i.e. be one-dimensional).' % name)
+            else:
+                raise ValueError('Condensed distance matrix must have shape=1 (i.e. be one-dimensional).')
+        n = y.shape[0]
+        d = int(np.ceil(np.sqrt(n * 2)))
+        if (d*(d-1)/2) != n:
+            if name:
+                raise ValueError('Length n of condensed distance matrix \'%s\' must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!' % name)
+            else:
+                raise ValueError('Length n of condensed distance matrix must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!')
+    except Exception, e:
+        if throw:
+            raise
+        if warning:
+            _warning(str(e))
+        valid = False
+    return valid
+
+def numobs_dm(D):
+    """
+    Returns the number of original observations that correspond to a
+    square, redudant distance matrix D.
+
+    :Parameters:
+       D : ndarray
+           The target distance matrix.
+
+    :Returns:
+       The number of observations in the redundant distance matrix.
+    """
+    D = np.asarray(D)
+    is_valid_dm(D, tol=np.inf, throw=True, name='D')
+    return D.shape[0]
+
+def numobs_y(Y):
+    """
+    Returns the number of original observations that correspond to a
+    condensed distance matrix Y.
+
+    :Parameters:
+       Y : ndarray
+           The number of original observations in the condensed
+           observation ``Y``.
+
+    :Returns:
+       n : int
+           The number of observations in the condensed distance matrix
+           passed.
+    """
+    Y = np.asarray(Y)
+    is_valid_y(Y, throw=True, name='Y')
+    d = int(np.ceil(np.sqrt(Y.shape[0] * 2)))
+    return d

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/hierarchy.py	2008-08-23 05:21:05 UTC (rev 4666)
@@ -22,8 +22,6 @@
  median             the median/WPGMC algorithm. (alias)
  ward               the Ward/incremental algorithm. (alias)
 
- squareform         converts a sq. D.M. to a condensed one and vice versa.
-
 Statistic computations on hierarchies
 
  cophenet           computes the cophenetic distance between leaves.
@@ -46,10 +44,8 @@
 
 Predicates
 
- is_valid_dm        checks for a valid distance matrix.
  is_valid_im        checks for a valid inconsistency matrix.
  is_valid_linkage   checks for a valid hierarchical clustering.
- is_valid_y         checks for a valid condensed distance matrix.
  is_isomorphic      checks if two flat clusterings are isomorphic.
  is_monotonic       checks if a linkage is monotonic.
  Z_y_correspond     checks for validity of distance matrix given a linkage.
@@ -57,8 +53,6 @@
 Utility Functions
 
  numobs_dm          # of observations in a distance matrix.
- numobs_linkage     # of observations in a linkage.
- numobs_y           # of observations in a condensed distance matrix.
 
 Legal stuff
 
@@ -186,12 +180,6 @@
     l = [_copy_array_if_base_present(a) for a in T]
     return l
 
-
-def copying():
-    """ Displays the license for this package."""
-    print _copyingtxt
-    return None
-
 def _randdm(pnts):
     """ Generates a random distance matrix stored in condensed form. A
         pnts * (pnts - 1) / 2 sized vector is returned.
@@ -204,53 +192,68 @@
 
 def single(y):
     """
-    Z = single(y)
-
     Performs single/min/nearest linkage on the condensed distance
-    matrix Z. See linkage for more information on the return structure
-    and algorithm.
+    matrix ``y``. See ``linkage`` for more information on the return
+    structure and algorithm.
 
-      (a condensed alias for single)
+    :Parameters:
+        y : ndarray
+            The upper triangular of the distance matrix y. The result of
+            ``pdist`` is returned in this form.
+
+    :Returns:
+        Z : ndarray
+            The linkage matrix.
+
     """
     return linkage(y, method='single', metric='euclidean')
 
 def complete(y):
     """
-    Z = complete(y)
-
     Performs complete complete/max/farthest point linkage on the
-    condensed distance matrix Z. See linkage for more information
-    on the return structure and algorithm.
+    condensed distance matrix ``y``. See ``linkage`` for more
+    information on the return structure and algorithm.
 
-      (a condensed alias for linkage)
+    :Parameters:
+        y : ndarray
+            The upper triangular of the distance matrix y. The result of
+            ``pdist`` is returned in this form.
+
     """
     return linkage(y, method='complete', metric='euclidean')
 
 def average(y):
     """
-    Z = average(y)
+    Performs average/UPGMA linkage on the condensed distance matrix
+    ``y``. See ``linkage`` for more information on the return
+    structure and algorithm.
 
-    Performs average/UPGMA linkage on the condensed distance matrix Z. See
-    linkage for more information on the return structure and algorithm.
-
-      (a condensed alias for linkage)
+    :Parameters:
+        y : ndarray
+            The upper triangular of the distance matrix y. The result of
+            ``pdist`` is returned in this form.
     """
     return linkage(y, method='average', metric='euclidean')
 
 def weighted(y):
     """
-    Z = weighted(y)
+    Performs weighted/WPGMA linkage on the condensed distance matrix
+    ``y``.  See ``linkage`` for more information on the return
+    structure and algorithm.
 
-    Performs weighted/WPGMA linkage on the condensed distance matrix Z.
-    See linkage for more information on the return structure and
-    algorithm.
-
-      (a condensed alias for linkage)
+    :Parameters:
+        y : ndarray
+            The upper triangular of the distance matrix y. The result of
+            ``pdist`` is returned in this form.
     """
     return linkage(y, method='weighted', metric='euclidean')
 
 def centroid(y):
     """
+    Performs centroid/UPGMC linkage. See ``linkage`` for more
+    information on the return structure and algorithm.
+
+
     Z = centroid(y)
 
     Performs centroid/UPGMC linkage on the condensed distance matrix Z.
@@ -652,107 +655,6 @@
     else:
         return nd
 
-def squareform(X, force="no", checks=True):
-    """
-    ... = squareform(...)
-
-    Converts a vector-form distance vector to a square-form distance
-    matrix, and vice-versa.
-
-    v = squareform(X)
-
-      Given a square d by d symmetric distance matrix X, v=squareform(X)
-      returns a d*(d-1)/2 (or {n \choose 2}) sized vector v.
-
-      v[{n \choose 2}-{n-i \choose 2} + (j-i-1)] is the distance
-      between points i and j. If X is non-square or asymmetric, an error
-      is returned.
-
-    X = squareform(v)
-
-      Given a d*d(-1)/2 sized v for some integer d>=2 encoding distances
-      as described, X=squareform(v) returns a d by d distance matrix X. The
-      X[i, j] and X[j, i] values are set to
-      v[{n \choose 2}-{n-i \choose 2} + (j-u-1)] and all
-      diagonal elements are zero.
-
-    As with MATLAB(TM), if force is equal to 'tovector' or 'tomatrix',
-    the input will be treated as a distance matrix or distance vector
-    respectively.
-
-    If checks is set to False, no checks will be made for matrix
-    symmetry nor zero diagonals. This is useful if it is known that
-    X - X.T is small and diag(X) is close to zero. These values are
-    ignored any way so they do not disrupt the squareform
-    transformation.
-    """
-
-    X = _convert_to_double(np.asarray(X))
-
-    if not np.issubsctype(X, np.double):
-        raise TypeError('A double array must be passed.')
-
-    s = X.shape
-
-    # X = squareform(v)
-    if len(s) == 1 and force != 'tomatrix':
-        if X.shape[0] == 0:
-            return np.zeros((1,1), dtype=np.double)
-
-        # Grab the closest value to the square root of the number
-        # of elements times 2 to see if the number of elements
-        # is indeed a binomial coefficient.
-        d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
-
-        # Check that v is of valid dimensions.
-        if d * (d - 1) / 2 != int(s[0]):
-            raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
-
-        # Allocate memory for the distance matrix.
-        M = np.zeros((d, d), dtype=np.double)
-
-        # Since the C code does not support striding using strides.
-        # The dimensions are used instead.
-        [X] = _copy_arrays_if_base_present([X])
-
-        # Fill in the values of the distance matrix.
-        _hierarchy_wrap.to_squareform_from_vector_wrap(M, X)
-
-        # Return the distance matrix.
-        M = M + M.transpose()
-        return M
-    elif len(s) != 1 and force.lower() == 'tomatrix':
-        raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
-    elif len(s) == 2 and force.lower() != 'tovector':
-        if s[0] != s[1]:
-            raise ValueError('The matrix argument must be square.')
-        if checks:
-            if np.sum(np.sum(X == X.transpose())) != np.product(X.shape):
-                raise ValueError('The distance matrix array must be symmetrical.')
-            if (X.diagonal() != 0).any():
-                raise ValueError('The distance matrix array must have zeros along the diagonal.')
-
-        # One-side of the dimensions is set here.
-        d = s[0]
-
-        if d <= 1:
-            return np.array([], dtype=np.double)
-
-        # Create a vector.
-        v = np.zeros(((d * (d - 1) / 2),), dtype=np.double)
-
-        # Since the C code does not support striding using strides.
-        # The dimensions are used instead.
-        [X] = _copy_arrays_if_base_present([X])
-
-        # Convert the vector to squareform.
-        _hierarchy_wrap.to_vector_from_squareform_wrap(X, v)
-        return v
-    elif len(s) != 2 and force.lower() == 'tomatrix':
-        raise ValueError("Forcing 'tomatrix' but input X is not a distance vector.")
-    else:
-        raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
-
 def _convert_to_bool(X):
     if X.dtype != np.bool:
         X = np.bool_(X)
@@ -1035,140 +937,6 @@
         valid = False
     return valid
 
-def is_valid_y(y, warning=False, throw=False, name=None):
-    """
-    is_valid_y(y)
-
-      Returns True if the variable y passed is a valid condensed
-      distance matrix. Condensed distance matrices must be
-      1-dimensional numpy arrays containing doubles. Their length
-      must be a binomial coefficient {n \choose 2} for some positive
-      integer n.
-
-    is_valid_y(..., warning=True, name='V')
-
-      Invokes a warning if the variable passed is not a valid condensed distance
-      matrix. The warning message explains why the distance matrix is not valid.
-      'name' is used when referencing the offending variable.
-
-    is_valid_y(..., throw=True, name='V')
-
-      Throws an exception if the variable passed is not a valid condensed distance
-      matrix. The message explains why variable is not valid. 'name' is used when
-      referencing the offending variable.
-
-    """
-    y = np.asarray(y)
-    valid = True
-    try:
-        if type(y) != np.ndarray:
-            if name:
-                raise TypeError('\'%s\' passed as a condensed distance matrix is not a numpy array.' % name)
-            else:
-                raise TypeError('Variable is not a numpy array.')
-        if y.dtype != np.double:
-            if name:
-                raise TypeError('Condensed distance matrix \'%s\' must contain doubles (double).' % name)
-            else:
-                raise TypeError('Condensed distance matrix must contain doubles (double).')
-        if len(y.shape) != 1:
-            if name:
-                raise ValueError('Condensed distance matrix \'%s\' must have shape=1 (i.e. be one-dimensional).' % name)
-            else:
-                raise ValueError('Condensed distance matrix must have shape=1 (i.e. be one-dimensional).')
-        n = y.shape[0]
-        d = int(np.ceil(np.sqrt(n * 2)))
-        if (d*(d-1)/2) != n:
-            if name:
-                raise ValueError('Length n of condensed distance matrix \'%s\' must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!' % name)
-            else:
-                raise ValueError('Length n of condensed distance matrix must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!')
-    except Exception, e:
-        if throw:
-            raise
-        if warning:
-            _warning(str(e))
-        valid = False
-    return valid
-
-
-def is_valid_dm(D, tol=0.0, throw=False, name="D"):
-    """
-    is_valid_dm(D)
-
-      Returns True if the variable D passed is a valid distance matrix.
-      Distance matrices must be 2-dimensional numpy arrays containing
-      doubles. They must have a zero-diagonal, and they must be symmetric.
-
-    is_valid_dm(D, tol)
-
-      Returns True if the variable D passed is a valid distance matrix.
-      Small numerical differences in D and D.T and non-zeroness of the
-      diagonal are ignored if they are within the tolerance specified
-      by tol.
-
-    is_valid_dm(..., warning=True, name='V')
-
-      Invokes a warning if the variable passed is not a valid distance matrix.
-      The warning message explains why the distance matrix is not valid. 'name'
-      is used when referencing the offending variable.
-
-    is_valid_dm(..., throw=True, name='V')
-
-      Throws an exception if the varible passed is not valid. The message
-      explains why the variable is not valid. 'name' is used when referencing
-      the offending variable.
-
-    """
-    D = np.asarray(D)
-    valid = True
-    try:
-        if type(D) != np.ndarray:
-            if name:
-                raise TypeError('\'%s\' passed as a distance matrix is not a numpy array.' % name)
-            else:
-                raise TypeError('Variable is not a numpy array.')
-        s = D.shape
-        if D.dtype != np.double:
-            if name:
-                raise TypeError('Distance matrix \'%s\' must contain doubles (double).' % name)
-            else:
-                raise TypeError('Distance matrix must contain doubles (double).')
-        if len(D.shape) != 2:
-            if name:
-                raise ValueError('Distance matrix \'%s\' must have shape=2 (i.e. be two-dimensional).' % name)
-            else:
-                raise ValueError('Distance matrix must have shape=2 (i.e. be two-dimensional).')
-        if tol == 0.0:
-            if not (D == D.T).all():
-                if name:
-                    raise ValueError('Distance matrix \'%s\' must be symmetric.' % name)
-                else:
-                    raise ValueError('Distance matrix must be symmetric.')
-            if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all():
-                if name:
-                    raise ValueError('Distance matrix \'%s\' diagonal must be zero.' % name)
-                else:
-                    raise ValueError('Distance matrix diagonal must be zero.')
-        else:
-            if not (D - D.T <= tol).all():
-                if name:
-                    raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, tol))
-                else:
-                    raise ValueError('Distance matrix must be symmetric within tolerance %5.5f.' % tol)
-            if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
-                if name:
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % (name, tol))
-                else:
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % tol)
-    except Exception, e:
-        if throw:
-            raise
-        if warning:
-            _warning(str(e))
-        valid = False
-    return valid
-
 def numobs_linkage(Z):
     """
     Returns the number of original observations that correspond to a
@@ -1178,29 +946,6 @@
     is_valid_linkage(Z, throw=True, name='Z')
     return (Z.shape[0] + 1)
 
-def numobs_dm(D):
-    """
-    numobs_dm(D)
-
-      Returns the number of original observations that correspond to a
-      square, non-condensed distance matrix D.
-    """
-    D = np.asarray(D)
-    is_valid_dm(D, tol=np.inf, throw=True, name='D')
-    return D.shape[0]
-
-def numobs_y(Y):
-    """
-    numobs_y(Y)
-
-      Returns the number of original observations that correspond to a
-      condensed distance matrix Y.
-    """
-    Y = np.asarray(Y)
-    is_valid_y(Y, throw=True, name='Y')
-    d = int(np.ceil(np.sqrt(Y.shape[0] * 2)))
-    return d
-
 def Z_y_correspond(Z, Y):
     """
     yesno = Z_y_correspond(Z, Y)
@@ -1213,7 +958,7 @@
     """
     Z = np.asarray(Z)
     Y = np.asarray(Y)
-    return numobs_y(Y) == numobs_Z(Z)
+    return numobs_y(Y) == numobs_linkage(Z)
 
 def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
     """

Modified: trunk/scipy/cluster/src/distance.c
===================================================================
--- trunk/scipy/cluster/src/distance.c	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/distance.c	2008-08-23 05:21:05 UTC (rev 4666)
@@ -590,3 +590,29 @@
     }
   }
 }
+
+void dist_to_squareform_from_vector(double *M, const double *v, int n) {
+  double *it;
+  const double *cit;
+  int i, j;
+  cit = v;
+  for (i = 0; i < n - 1; i++) {
+    it = M + (i * n) + i + 1;
+    for (j = i + 1; j < n; j++, it++, cit++) {
+      *it = *cit;
+    }
+  }
+}
+
+void dist_to_vector_from_squareform(const double *M, double *v, int n) {
+  double *it;
+  const double *cit;
+  int i, j;
+  it = v;
+  for (i = 0; i < n - 1; i++) {
+    cit = M + (i * n) + i + 1;
+    for (j = i + 1; j < n; j++, it++, cit++) {
+      *it = *cit;
+    }
+  }
+}

Modified: trunk/scipy/cluster/src/distance.h
===================================================================
--- trunk/scipy/cluster/src/distance.h	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/distance.h	2008-08-23 05:21:05 UTC (rev 4666)
@@ -37,6 +37,8 @@
 #ifndef _CPY_DISTANCE_H
 #define _CPY_DISTANCE_H
 
+void dist_to_squareform_from_vector(double *M, const double *v, int n);
+void dist_to_vector_from_squareform(const double *M, double *v, int n);
 void pdist_euclidean(const double *X, double *dm, int m, int n);
 void pdist_seuclidean(const double *X,
 		      const double *var, double *dm, int m, int n);

Modified: trunk/scipy/cluster/src/distance_wrap.c
===================================================================
--- trunk/scipy/cluster/src/distance_wrap.c	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/distance_wrap.c	2008-08-23 05:21:05 UTC (rev 4666)
@@ -493,7 +493,45 @@
   return Py_BuildValue("");
 }
 
+extern PyObject *to_squareform_from_vector_wrap(PyObject *self, PyObject *args) {
+  PyArrayObject *M_, *v_;
+  int n;
+  const double *v;
+  double *M;
+  if (!PyArg_ParseTuple(args, "O!O!",
+			&PyArray_Type, &M_,
+			&PyArray_Type, &v_)) {
+    return 0;
+  }
+  else {
+    M = (double*)M_->data;
+    v = (const double*)v_->data;
+    n = M_->dimensions[0];
+    dist_to_squareform_from_vector(M, v, n);
+  }
+  return Py_BuildValue("d", 0.0);
+}
 
+extern PyObject *to_vector_from_squareform_wrap(PyObject *self, PyObject *args) {
+  PyArrayObject *M_, *v_;
+  int n;
+  double *v;
+  const double *M;
+  if (!PyArg_ParseTuple(args, "O!O!",
+			&PyArray_Type, &M_,
+			&PyArray_Type, &v_)) {
+    return 0;
+  }
+  else {
+    M = (const double*)M_->data;
+    v = (double*)v_->data;
+    n = M_->dimensions[0];
+    dist_to_vector_from_squareform(M, v, n);
+  }
+  return Py_BuildValue("d", 0.0);
+}
+
+
 static PyMethodDef _distanceWrapMethods[] = {
   {"pdist_bray_curtis_wrap", pdist_bray_curtis_wrap, METH_VARARGS},
   {"pdist_canberra_wrap", pdist_canberra_wrap, METH_VARARGS},
@@ -516,6 +554,10 @@
   {"pdist_sokalmichener_bool_wrap", pdist_sokalmichener_bool_wrap, METH_VARARGS},
   {"pdist_sokalsneath_bool_wrap", pdist_sokalsneath_bool_wrap, METH_VARARGS},
   {"pdist_yule_bool_wrap", pdist_yule_bool_wrap, METH_VARARGS},
+  {"to_squareform_from_vector_wrap",
+   to_squareform_from_vector_wrap, METH_VARARGS},
+  {"to_vector_from_squareform_wrap",
+   to_vector_from_squareform_wrap, METH_VARARGS},
   {NULL, NULL}     /* Sentinel - marks the end of this structure */
 };
 

Modified: trunk/scipy/cluster/src/hierarchy.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy.c	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/hierarchy.c	2008-08-23 05:21:05 UTC (rev 4666)
@@ -871,32 +871,6 @@
   free(centroids);
 }
 
-void dist_to_squareform_from_vector(double *M, const double *v, int n) {
-  double *it;
-  const double *cit;
-  int i, j;
-  cit = v;
-  for (i = 0; i < n - 1; i++) {
-    it = M + (i * n) + i + 1;
-    for (j = i + 1; j < n; j++, it++, cit++) {
-      *it = *cit;
-    }
-  }
-}
-
-void dist_to_vector_from_squareform(const double *M, double *v, int n) {
-  double *it;
-  const double *cit;
-  int i, j;
-  it = v;
-  for (i = 0; i < n - 1; i++) {
-    cit = M + (i * n) + i + 1;
-    for (j = i + 1; j < n; j++, it++, cit++) {
-      *it = *cit;
-    }
-  }
-}
-
 void cpy_to_tree(const double *Z, cnode **tnodes, int n) {
   const double *row;
   cnode *node;

Modified: trunk/scipy/cluster/src/hierarchy.h
===================================================================
--- trunk/scipy/cluster/src/hierarchy.h	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/hierarchy.h	2008-08-23 05:21:05 UTC (rev 4666)
@@ -86,9 +86,6 @@
 
 typedef void (distfunc) (cinfo *info, int mini, int minj, int np, int n); 
 
-void dist_to_squareform_from_vector(double *M, const double *v, int n);
-void dist_to_vector_from_squareform(const double *M, double *v, int n);
-
 void inconsistency_calculation(const double *Z, double *R, int n, int d);
 void inconsistency_calculation_alt(const double *Z, double *R, int n, int d);
 

Modified: trunk/scipy/cluster/src/hierarchy_wrap.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy_wrap.c	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/src/hierarchy_wrap.c	2008-08-23 05:21:05 UTC (rev 4666)
@@ -304,7 +304,6 @@
   return Py_BuildValue("d", 0.0);
 }
 
-
 extern PyObject *chopmin_ns_i_wrap(PyObject *self, PyObject *args) {
   int mini, n;
   PyArrayObject *row;
@@ -332,44 +331,7 @@
   return Py_BuildValue("d", 0.0);
 }
 
-extern PyObject *to_squareform_from_vector_wrap(PyObject *self, PyObject *args) {
-  PyArrayObject *M_, *v_;
-  int n;
-  const double *v;
-  double *M;
-  if (!PyArg_ParseTuple(args, "O!O!",
-			&PyArray_Type, &M_,
-			&PyArray_Type, &v_)) {
-    return 0;
-  }
-  else {
-    M = (double*)M_->data;
-    v = (const double*)v_->data;
-    n = M_->dimensions[0];
-    dist_to_squareform_from_vector(M, v, n);
-  }
-  return Py_BuildValue("d", 0.0);
-}
 
-extern PyObject *to_vector_from_squareform_wrap(PyObject *self, PyObject *args) {
-  PyArrayObject *M_, *v_;
-  int n;
-  double *v;
-  const double *M;
-  if (!PyArg_ParseTuple(args, "O!O!",
-			&PyArray_Type, &M_,
-			&PyArray_Type, &v_)) {
-    return 0;
-  }
-  else {
-    M = (const double*)M_->data;
-    v = (double*)v_->data;
-    n = M_->dimensions[0];
-    dist_to_vector_from_squareform(M, v, n);
-  }
-  return Py_BuildValue("d", 0.0);
-}
-
 extern PyObject *leaders_wrap(PyObject *self, PyObject *args) {
   PyArrayObject *Z_, *T_, *L_, *M_;
   int kk, n, res;
@@ -408,10 +370,6 @@
   {"linkage_euclid_wrap", linkage_euclid_wrap, METH_VARARGS},
   {"linkage_wrap", linkage_wrap, METH_VARARGS},
   {"prelist_wrap", prelist_wrap, METH_VARARGS},
-  {"to_squareform_from_vector_wrap",
-   to_squareform_from_vector_wrap, METH_VARARGS},
-  {"to_vector_from_squareform_wrap",
-   to_vector_from_squareform_wrap, METH_VARARGS},
   {NULL, NULL}     /* Sentinel - marks the end of this structure */
 };
 

Modified: trunk/scipy/cluster/tests/test_distance.py
===================================================================
--- trunk/scipy/cluster/tests/test_distance.py	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/tests/test_distance.py	2008-08-23 05:21:05 UTC (rev 4666)
@@ -39,8 +39,8 @@
 
 import numpy as np
 from numpy.testing import *
-from scipy.cluster.hierarchy import squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage
-from scipy.cluster.distance import pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
+from scipy.cluster.hierarchy import linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage
+from scipy.cluster.distance import squareform, pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
 
 #from scipy.cluster.hierarchy import pdist, euclidean
 
@@ -946,3 +946,69 @@
 
 def within_tol(a, b, tol):
     return np.abs(a - b).max() < tol
+
+
+class TestSquareForm(TestCase):
+
+    ################### squareform
+    def test_squareform_empty_matrix(self):
+        "Tests squareform on an empty matrix."
+        A = np.zeros((0,0))
+        rA = squareform(np.array(A, dtype='double'))
+        self.failUnless(rA.shape == (0,))
+
+    def test_squareform_empty_vector(self):
+        v = np.zeros((0,))
+        rv = squareform(np.array(v, dtype='double'))
+        self.failUnless(rv.shape == (1,1))
+        self.failUnless(rv[0, 0] == 0)
+
+    def test_squareform_1by1_matrix(self):
+        "Tests squareform on a 1x1 matrix."
+        A = np.zeros((1,1))
+        rA = squareform(np.array(A, dtype='double'))
+        self.failUnless(rA.shape == (0,))
+
+    def test_squareform_one_vector(self):
+        "Tests squareform on a 1-D array, length=1."
+        v = np.ones((1,)) * 8.3
+        rv = squareform(np.array(v, dtype='double'))
+        self.failUnless(rv.shape == (2,2))
+        self.failUnless(rv[0,1] == 8.3)
+        self.failUnless(rv[1,0] == 8.3)
+
+    def test_squareform_2by2_matrix(self):
+        "Tests squareform on a 2x2 matrix."
+        A = np.zeros((2,2))
+        A[0,1]=0.8
+        A[1,0]=0.8
+        rA = squareform(np.array(A, dtype='double'))
+        self.failUnless(rA.shape == (1,))
+        self.failUnless(rA[0] == 0.8)
+
+    def test_squareform_multi_matrix(self):
+        "Tests squareform on a square matrices of multiple sizes."
+        for n in xrange(2, 5):
+            yield self.check_squareform_multi_matrix(n)
+
+    def check_squareform_multi_matrix(self, n):
+        X = np.random.rand(n, 4)
+        Y = pdist(X)
+        self.failUnless(len(Y.shape) == 1)
+        A = squareform(Y)
+        Yr = squareform(A)
+        s = A.shape
+        k = 0
+        if verbose >= 3:
+            print A.shape, Y.shape, Yr.shape
+        self.failUnless(len(s) == 2)
+        self.failUnless(len(Yr.shape) == 1)
+        self.failUnless(s[0] == s[1])
+        for i in xrange(0, s[0]):
+            for j in xrange(i+1, s[1]):
+                if i != j:
+                    #print i, j, k, A[i, j], Y[k]
+                    self.failUnless(A[i, j] == Y[k])
+                    k += 1
+                else:
+                    self.failUnless(A[i, j] == 0)

Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py	2008-08-23 00:42:16 UTC (rev 4665)
+++ trunk/scipy/cluster/tests/test_hierarchy.py	2008-08-23 05:21:05 UTC (rev 4666)
@@ -39,8 +39,8 @@
 import numpy as np
 from numpy.testing import *
 
-from scipy.cluster.hierarchy import squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage, inconsistent
-from scipy.cluster.distance import pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
+from scipy.cluster.hierarchy import linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage, inconsistent
+from scipy.cluster.distance import squareform, pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
 
 _tdist = np.array([[0,    662,  877,  255,  412,  996],
                       [662,  0,    295,  468,  268,  400],
@@ -86,71 +86,6 @@
 
 load_testing_files()
 
-class TestSquareForm(TestCase):
-
-    ################### squareform
-    def test_squareform_empty_matrix(self):
-        "Tests squareform on an empty matrix."
-        A = np.zeros((0,0))
-        rA = squareform(np.array(A, dtype='double'))
-        self.failUnless(rA.shape == (0,))
-
-    def test_squareform_empty_vector(self):
-        v = np.zeros((0,))
-        rv = squareform(np.array(v, dtype='double'))
-        self.failUnless(rv.shape == (1,1))
-        self.failUnless(rv[0, 0] == 0)
-
-    def test_squareform_1by1_matrix(self):
-        "Tests squareform on a 1x1 matrix."
-        A = np.zeros((1,1))
-        rA = squareform(np.array(A, dtype='double'))
-        self.failUnless(rA.shape == (0,))
-
-    def test_squareform_one_vector(self):
-        "Tests squareform on a 1-D array, length=1."
-        v = np.ones((1,)) * 8.3
-        rv = squareform(np.array(v, dtype='double'))
-        self.failUnless(rv.shape == (2,2))
-        self.failUnless(rv[0,1] == 8.3)
-        self.failUnless(rv[1,0] == 8.3)
-
-    def test_squareform_2by2_matrix(self):
-        "Tests squareform on a 2x2 matrix."
-        A = np.zeros((2,2))
-        A[0,1]=0.8
-        A[1,0]=0.8
-        rA = squareform(np.array(A, dtype='double'))
-        self.failUnless(rA.shape == (1,))
-        self.failUnless(rA[0] == 0.8)
-
-    def test_squareform_multi_matrix(self):
-        "Tests squareform on a square matrices of multiple sizes."
-        for n in xrange(2, 5):
-            yield self.check_squareform_multi_matrix(n)
-
-    def check_squareform_multi_matrix(self, n):
-        X = np.random.rand(n, 4)
-        Y = pdist(X)
-        self.failUnless(len(Y.shape) == 1)
-        A = squareform(Y)
-        Yr = squareform(A)
-        s = A.shape
-        k = 0
-        if verbose >= 3:
-            print A.shape, Y.shape, Yr.shape
-        self.failUnless(len(s) == 2)
-        self.failUnless(len(Yr.shape) == 1)
-        self.failUnless(s[0] == s[1])
-        for i in xrange(0, s[0]):
-            for j in xrange(i+1, s[1]):
-                if i != j:
-                    #print i, j, k, A[i, j], Y[k]
-                    self.failUnless(A[i, j] == Y[k])
-                    k += 1
-                else:
-                    self.failUnless(A[i, j] == 0)
-
 class TestNumObs(TestCase):
 
     ############## numobs_dm




More information about the Scipy-svn mailing list