[Scipy-svn] r4417 - in trunk/scipy/cluster: . src tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Mon Jun 9 01:55:52 EDT 2008
Author: damian.eads
Date: 2008-06-09 00:55:44 -0500 (Mon, 09 Jun 2008)
New Revision: 4417
Added:
trunk/scipy/cluster/distance.py
trunk/scipy/cluster/src/common.h
trunk/scipy/cluster/src/distance.c
trunk/scipy/cluster/src/distance.h
trunk/scipy/cluster/src/distance_wrap.c
Modified:
trunk/scipy/cluster/__init__.py
trunk/scipy/cluster/hierarchy.py
trunk/scipy/cluster/setup.py
trunk/scipy/cluster/src/hierarchy.c
trunk/scipy/cluster/src/hierarchy.h
trunk/scipy/cluster/src/hierarchy_wrap.c
trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Moved distance functions to new module.
Modified: trunk/scipy/cluster/__init__.py
===================================================================
--- trunk/scipy/cluster/__init__.py 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/__init__.py 2008-06-09 05:55:44 UTC (rev 4417)
@@ -4,7 +4,7 @@
from info import __doc__
-__all__ = ['vq', 'hierarchy']
+__all__ = ['vq', 'hierarchy', 'distance']
import vq, hierarchy
from scipy.testing.pkgtester import Tester
Added: trunk/scipy/cluster/distance.py
===================================================================
--- trunk/scipy/cluster/distance.py 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/distance.py 2008-06-09 05:55:44 UTC (rev 4417)
@@ -0,0 +1,839 @@
+"""
+Distance matrix computation from a collection of raw observation vectors
+
+ pdist computes distances between each observation pair.
+
+Distance functions between two vectors u and v
+
+ braycurtis the Bray-Curtis distance.
+ canberra the Canberra distance.
+ chebyshev the Chebyshev distance.
+ cityblock the Manhattan distance.
+ correlation the Correlation distance.
+ cosine the Cosine distance.
+ dice the Dice dissimilarity (boolean).
+ euclidean the Euclidean distance.
+ hamming the Hamming distance (boolean).
+ jaccard the Jaccard distance (boolean).
+ kulsinski the Kulsinski distance (boolean).
+ mahalanobis the Mahalanobis distance.
+ matching the matching dissimilarity (boolean).
+ minkowski the Minkowski distance.
+ rogerstanimoto the Rogers-Tanimoto dissimilarity (boolean).
+ russellrao the Russell-Rao dissimilarity (boolean).
+ seuclidean the normalized Euclidean distance.
+ sokalmichener the Sokal-Michener dissimilarity (boolean).
+ sokalsneath the Sokal-Sneath dissimilarity (boolean).
+ sqeuclidean the squared Euclidean distance.
+ yule the Yule dissimilarity (boolean).
+
+Copyright (C) Damian Eads, 2007-2008. New BSD License.
+
+"""
+
+import numpy as np
+import _distance_wrap
+import types
+
+def _copy_array_if_base_present(a):
+ """
+ Copies the array if its base points to a parent array.
+ """
+ if a.base is not None:
+ return a.copy()
+ elif np.issubsctype(a, np.float32):
+ return array(a, dtype=np.double)
+ else:
+ return a
+
+def _copy_arrays_if_base_present(T):
+ """
+ Accepts a tuple of arrays T. Copies the array T[i] if its base array
+ points to an actual array. Otherwise, the reference is just copied.
+ This is useful if the arrays are being passed to a C function that
+ does not do proper striding.
+ """
+ l = [_copy_array_if_base_present(a) for a in T]
+ return l
+
+def _convert_to_bool(X):
+ if X.dtype != np.bool:
+ X = np.bool_(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+def _convert_to_double(X):
+ if X.dtype != np.double:
+ X = np.double(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+def minkowski(u, v, p):
+ """
+ d = minkowski(u, v, p)
+
+ Returns the Minkowski distance between two vectors u and v,
+
+ ||u-v||_p = (\sum {|u_i - v_i|^p})^(1/p).
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ return (abs(u-v)**p).sum() ** (1.0 / p)
+
+def euclidean(u, v):
+ """
+ d = euclidean(u, v)
+
+ Computes the Euclidean distance between two n-vectors u and v, ||u-v||_2
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ q=np.matrix(u-v)
+ return np.sqrt((q*q.T).sum())
+
+def sqeuclidean(u, v):
+ """
+ d = sqeuclidean(u, v)
+
+ Computes the squared Euclidean distance between two n-vectors u and v,
+ (||u-v||_2)^2.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return ((u-v)*(u-v).T).sum()
+
+def cosine(u, v):
+ """
+ d = cosine(u, v)
+
+ Computes the Cosine distance between two n-vectors u and v,
+ (1-uv^T)/(||u||_2 * ||v||_2).
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return (1.0 - (np.dot(u, v.T) / \
+ (np.sqrt(np.dot(u, u.T)) * np.sqrt(np.dot(v, v.T)))))
+
+def correlation(u, v):
+ """
+ d = correlation(u, v)
+
+ Computes the correlation distance between two n-vectors u and v,
+
+ 1 - (u - n|u|_1)(v - n|v|_1)^T
+ --------------------------------- ,
+ |(u - n|u|_1)|_2 |(v - n|v|_1)|^T
+
+ where |*|_1 is the Manhattan norm and n is the common dimensionality
+ of the vectors.
+ """
+ umu = u.mean()
+ vmu = v.mean()
+ um = u - umu
+ vm = v - vmu
+ return 1.0 - (np.dot(um, vm) /
+ (np.sqrt(np.dot(um, um)) \
+ * np.sqrt(np.dot(vm, vm))))
+
+def hamming(u, v):
+ """
+ d = hamming(u, v)
+
+ Computes the Hamming distance between two n-vectors u and v,
+ which is simply the proportion of disagreeing components in u
+ and v. If u and v are boolean vectors, the hamming distance is
+
+ (c_{01} + c_{10}) / n
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return (u != v).mean()
+
+def jaccard(u, v):
+ """
+ d = jaccard(u, v)
+
+ Computes the Jaccard-Needham dissimilarity between two boolean
+ n-vectors u and v, which is
+
+ c_{TF} + c_{FT}
+ ------------------------
+ c_{TT} + c_{FT} + c_{TF}
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return (np.double(np.bitwise_and((u != v),
+ np.bitwise_or(u != 0, v != 0)).sum())
+ / np.double(np.bitwise_or(u != 0, v != 0).sum()))
+
+def kulsinski(u, v):
+ """
+ d = kulsinski(u, v)
+
+ Computes the Kulsinski dissimilarity between two boolean n-vectors
+ u and v, which is
+
+ c_{TF} + c_{FT} - c_{TT} + n
+ ----------------------------
+ c_{FT} + c_{TF} + n
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ n = len(u)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+
+ return (ntf + nft - ntt + n) / (ntf + nft + n)
+
+def seuclidean(u, v, V):
+ """
+ d = seuclidean(u, v, V)
+
+ Returns the standardized Euclidean distance between two
+ n-vectors u and v. V is a m-dimensional vector of component
+ variances. It is usually computed among a larger collection vectors.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ V = np.asarray(V)
+ if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
+ raise TypeError('V must be a 1-D array of the same dimension as u and v.')
+ return np.sqrt(((u-v)**2 / V).sum())
+
+def cityblock(u, v):
+ """
+ d = cityblock(u, v)
+
+ Computes the Manhattan distance between two n-vectors u and v,
+ \sum {u_i-v_i}.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return abs(u-v).sum()
+
+def mahalanobis(u, v, VI):
+ """
+ d = mahalanobis(u, v, VI)
+
+ Computes the Mahalanobis distance between two n-vectors u and v,
+ (u-v)VI(u-v)^T
+ where VI is the inverse covariance matrix.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ VI = np.asarray(VI)
+ return np.sqrt(np.dot(np.dot((u-v),VI),(u-v).T).sum())
+
+def chebyshev(u, v):
+ """
+ d = chebyshev(u, v)
+
+ Computes the Chebyshev distance between two n-vectors u and v,
+ \max {|u_i-v_i|}.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return max(abs(u-v))
+
+def braycurtis(u, v):
+ """
+ d = braycurtis(u, v)
+
+ Computes the Bray-Curtis distance between two n-vectors u and v,
+ \sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return abs(u-v).sum() / abs(u+v).sum()
+
+def canberra(u, v):
+ """
+ d = canberra(u, v)
+
+ Computes the Canberra distance between two n-vectors u and v,
+ \sum{|u_i-v_i|} / \sum{|u_i|+|v_i}.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())
+
+def _nbool_correspond_all(u, v):
+ if u.dtype != v.dtype:
+ raise TypeError("Arrays being compared must be of the same data type.")
+
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nff = (not_u * not_v).sum()
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ ntt = (u * v).sum()
+ elif u.dtype == np.bool:
+ not_u = ~u
+ not_v = ~v
+ nff = (not_u & not_v).sum()
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ ntt = (u & v).sum()
+ else:
+ raise TypeError("Arrays being compared have unknown type.")
+
+ return (nff, nft, ntf, ntt)
+
+def _nbool_correspond_ft_tf(u, v):
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nff = (not_u * not_v).sum()
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ ntt = (u * v).sum()
+ else:
+ not_u = ~u
+ not_v = ~v
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ return (nft, ntf)
+
+def yule(u, v):
+ """
+ d = yule(u, v)
+ Computes the Yule dissimilarity between two boolean n-vectors u and v,
+
+ R
+ ---------------------
+ c_{TT} + c_{FF} + R/2
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n, and
+
+ R = 2.0 * (c_{TF} + c_{FT}).
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ print nff, nft, ntf, ntt
+ return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
+
+def matching(u, v):
+ """
+ d = matching(u, v)
+
+ Computes the Matching dissimilarity between two boolean n-vectors
+ u and v, which is
+
+ (c_{TF} + c_{FT}) / n
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(nft + ntf) / float(len(u))
+
+def dice(u, v):
+ """
+ d = dice(u, v)
+
+ Computes the Dice dissimilarity between two boolean n-vectors
+ u and v, which is
+
+ c_{TF} + c_{FT}
+ ----------------------------
+ 2 * c_{TT} + c_{FT} + c_{TF}
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
+
+def rogerstanimoto(u, v):
+ """
+ d = rogerstanimoto(u, v)
+
+ Computes the Rogers-Tanimoto dissimilarity between two boolean
+ n-vectors u and v,
+
+ R
+ -------------------
+ c_{TT} + c_{FF} + R
+
+ where c_{ij} is the number of occurrences of
+
+ u[k] == i and v[k] == j
+
+ for k < n, and
+
+ R = 2.0 * (c_{TF} + c_{FT}).
+
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
+
+def russellrao(u, v):
+ """
+ d = russellrao(u, v)
+
+ Computes the Russell-Rao dissimilarity between two boolean n-vectors
+ u and v, (n - c_{TT}) / n where c_{ij} is the number of occurrences
+ of u[k] == i and v[k] == j for k < n.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ return float(len(u) - ntt) / float(len(u))
+
+def sokalmichener(u, v):
+ """
+ d = sokalmichener(u, v)
+
+ Computes the Sokal-Michener dissimilarity between two boolean vectors
+ u and v, 2R / (S + 2R) where c_{ij} is the number of occurrences of
+ u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}) and
+ S = c_{FF} + c_{TT}.
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ nff = (~u & ~v).sum()
+ else:
+ ntt = (u * v).sum()
+ nff = ((1.0 - u) * (1.0 - v)).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(2.0 * (ntf + nft))/float(ntt + nff + 2.0 * (ntf + nft))
+
+def sokalsneath(u, v):
+ """
+ d = sokalsneath(u, v)
+
+ Computes the Sokal-Sneath dissimilarity between two boolean vectors
+ u and v, 2R / (c_{TT} + 2R) where c_{ij} is the number of occurrences
+ of u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}).
+ """
+ u = np.asarray(u)
+ v = np.asarray(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(2.0 * (ntf + nft))/float(ntt + 2.0 * (ntf + nft))
+
+
+def pdist(X, metric='euclidean', p=2, V=None, VI=None):
+ """ Y = pdist(X, method='euclidean', p=2)
+
+ Computes the distance between m original observations in
+ n-dimensional space. Returns a condensed distance matrix Y.
+ For each i and j (i<j), the metric dist(u=X[i], v=X[j]) is
+ computed and stored in the ij'th entry. See squareform
+ to learn how to retrieve this entry.
+
+ 1. Y = pdist(X)
+
+ Computes the distance between m points using Euclidean distance
+ (2-norm) as the distance metric between the points. The points
+ are arranged as m n-dimensional row vectors in the matrix X.
+
+ 2. Y = pdist(X, 'minkowski', p)
+
+ Computes the distances using the Minkowski distance ||u-v||_p
+ (p-norm) where p>=1.
+
+ 3. Y = pdist(X, 'cityblock')
+
+ Computes the city block or Manhattan distance between the
+ points.
+
+ 4. Y = pdist(X, 'seuclidean', V=None)
+
+ Computes the standardized Euclidean distance. The standardized
+ Euclidean distance between two n-vectors u and v is
+
+ sqrt(\sum {(u_i-v_i)^2 / V[x_i]}).
+
+ V is the variance vector; V[i] is the variance computed over all
+ the i'th components of the points. If not passed, it is
+ automatically computed.
+
+ 5. Y = pdist(X, 'sqeuclidean')
+
+ Computes the squared Euclidean distance ||u-v||_2^2 between
+ the vectors.
+
+ 6. Y = pdist(X, 'cosine')
+
+ Computes the cosine distance between vectors u and v,
+
+ 1 - uv^T
+ -----------
+ |u|_2 |v|_2
+
+ where |*|_2 is the 2 norm of its argument *.
+
+ 7. Y = pdist(X, 'correlation')
+
+ Computes the correlation distance between vectors u and v. This is
+
+ 1 - (u - n|u|_1)(v - n|v|_1)^T
+ --------------------------------- ,
+ |(u - n|u|_1)|_2 |(v - n|v|_1)|^T
+
+ where |*|_1 is the Manhattan (or 1-norm) of its argument *,
+ and n is the common dimensionality of the vectors.
+
+ 8. Y = pdist(X, 'hamming')
+
+ Computes the normalized Hamming distance, or the proportion
+ of those vector elements between two n-vectors u and v which
+ disagree. To save memory, the matrix X can be of type boolean.
+
+ 9. Y = pdist(X, 'jaccard')
+
+ Computes the Jaccard distance between the points. Given two
+ vectors, u and v, the Jaccard distance is the proportion of
+ those elements u_i and v_i that disagree where at least one
+ of them is non-zero.
+
+ 10. Y = pdist(X, 'chebyshev')
+
+ Computes the Chebyshev distance between the points. The
+ Chebyshev distance between two n-vectors u and v is the maximum
+ norm-1 distance between their respective elements. More
+ precisely, the distance is given by
+
+ d(u,v) = max {|u_i-v_i|}.
+
+ 11. Y = pdist(X, 'canberra')
+
+ Computes the Canberra distance between the points. The
+ Canberra distance between two points u and v is
+
+ |u_1-v_1| |u_2-v_2| |u_n-v_n|
+ d(u,v) = ----------- + ----------- + ... + -----------
+ |u_1|+|v_1| |u_2|+|v_2| |u_n|+|v_n|
+
+ 12. Y = pdist(X, 'braycurtis')
+
+ Computes the Bray-Curtis distance between the points. The
+ Bray-Curtis distance between two points u and v is
+
+ |u_1-v_1| + |u_2-v_2| + ... + |u_n-v_n|
+ d(u,v) = ---------------------------------------
+ |u_1+v_1| + |u_2+v_2| + ... + |u_n+v_n|
+
+ 13. Y = pdist(X, 'mahalanobis', VI=None)
+
+ Computes the Mahalanobis distance between the points. The
+ Mahalanobis distance between two points u and v is
+ (u-v)(1/V)(u-v)^T
+ where (1/V) is the inverse covariance. If VI is not None,
+ VI will be used as the inverse covariance matrix.
+
+ 14. Y = pdist(X, 'yule')
+
+ Computes the Yule distance between each pair of boolean
+ vectors. (see yule function documentation)
+
+ 15. Y = pdist(X, 'matching')
+
+ Computes the matching distance between each pair of boolean
+ vectors. (see matching function documentation)
+
+ 16. Y = pdist(X, 'dice')
+
+ Computes the Dice distance between each pair of boolean
+ vectors. (see dice function documentation)
+
+ 17. Y = pdist(X, 'kulsinski')
+
+ Computes the Kulsinski distance between each pair of
+ boolean vectors. (see kulsinski function documentation)
+
+ 17. Y = pdist(X, 'rogerstanimoto')
+
+ Computes the Rogers-Tanimoto distance between each pair of
+ boolean vectors. (see rogerstanimoto function documentation)
+
+ 18. Y = pdist(X, 'russellrao')
+
+ Computes the Russell-Rao distance between each pair of
+ boolean vectors. (see russellrao function documentation)
+
+ 19. Y = pdist(X, 'sokalmichener')
+
+ Computes the Sokal-Michener distance between each pair of
+ boolean vectors. (see sokalmichener function documentation)
+
+ 20. Y = pdist(X, 'sokalsneath')
+
+ Computes the Sokal-Sneath distance between each pair of
+ boolean vectors. (see sokalsneath function documentation)
+
+ 21. Y = pdist(X, f)
+
+ Computes the distance between all pairs of vectors in X
+ using the user supplied 2-arity function f. For example,
+ Euclidean distance between the vectors could be computed
+ as follows,
+
+ dm = pdist(X, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
+
+ Note that you should avoid passing a reference to one of
+ the distance functions defined in this library. For example,
+
+ dm = pdist(X, sokalsneath)
+
+ would calculate the pair-wise distances between the vectors
+ in X using the Python function sokalsneath. This would result
+ in sokalsneath being called {n \choose 2} times, which is
+ inefficient. Instead, the optimized C version is more
+ efficient, and we call it using the following syntax.
+
+ dm = pdist(X, 'sokalsneath')
+ """
+# 21. Y = pdist(X, 'test_Y')
+#
+# Computes the distance between all pairs of vectors in X
+# using the distance metric Y but with a more succint,
+# verifiable, but less efficient implementation.
+
+
+ X = np.asarray(X)
+
+ #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
+ # raise TypeError('Floating point arrays must be 64-bit (got %r).' %
+ # (X.dtype.type,))
+
+ # The C code doesn't do striding.
+ [X] = _copy_arrays_if_base_present([_convert_to_double(X)])
+
+ s = X.shape
+
+ if len(s) != 2:
+ raise ValueError('A 2-dimensional array must be passed.');
+
+ m = s[0]
+ n = s[1]
+ dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
+
+ mtype = type(metric)
+ if mtype is types.FunctionType:
+ k = 0
+ if metric == minkowski:
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = minkowski(X[i, :], X[j, :], p)
+ k = k + 1
+ elif metric == seuclidean:
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = seuclidean(X[i, :], X[j, :], V)
+ k = k + 1
+ elif metric == mahalanobis:
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = mahalanobis(X[i, :], X[j, :], V)
+ k = k + 1
+ else:
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = metric(X[i, :], X[j, :])
+ k = k + 1
+
+ elif mtype is types.StringType:
+ mstr = metric.lower()
+
+ #if X.dtype != np.double and \
+ # (mstr != 'hamming' and mstr != 'jaccard'):
+ # TypeError('A double array must be passed.')
+ if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ dm = dm ** 2.0
+ elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
+ _distance_wrap.pdist_city_block_wrap(X, dm)
+ elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_hamming_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_hamming_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_jaccard_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_jaccard_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
+ _distance_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['minkowski', 'mi', 'm']):
+ _distance_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
+ elif mstr in set(['seuclidean', 'se', 's']):
+ if V is not None:
+ V = np.asarray(V)
+ if type(V) != np.ndarray:
+ raise TypeError('Variance vector V must be a numpy array')
+ if V.dtype != np.double:
+ raise TypeError('Variance vector V must contain doubles.')
+ if len(V.shape) != 1:
+ raise ValueError('Variance vector V must be one-dimensional.')
+ if V.shape[0] != n:
+ raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.')
+ # The C code doesn't do striding.
+ [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
+ else:
+ VV = np.var(X, axis=0, ddof=1)
+ _distance_wrap.pdist_seuclidean_wrap(_convert_to_double(X), VV, dm)
+ # Need to test whether vectorized cosine works better.
+ # Find out: Is there a dot subtraction operator so I can
+ # subtract matrices in a similar way to multiplying them?
+ # Need to get rid of as much unnecessary C code as possible.
+ elif mstr in set(['cosine', 'cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X), dm, norms)
+ elif mstr in set(['old_cosine', 'old_cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ nV = norms.reshape(m, 1)
+ # The numerator u * v
+ nm = np.dot(X, X.T)
+ # The denom. ||u||*||v||
+ de = np.dot(nV, nV.T);
+ dm = 1.0 - (nm / de)
+ dm[xrange(0,m),xrange(0,m)] = 0.0
+ dm = squareform(dm)
+ elif mstr in set(['correlation', 'co']):
+ X2 = X - X.mean(1)[:,np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ norms = np.sqrt(np.sum(X2 * X2, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2), _convert_to_double(dm), _convert_to_double(norms))
+ elif mstr in set(['mahalanobis', 'mahal', 'mah']):
+ if VI is not None:
+ VI = _convert_to_double(np.asarray(VI))
+ if type(VI) != np.ndarray:
+ raise TypeError('VI must be a numpy array.')
+ if VI.dtype != np.double:
+ raise TypeError('The array must contain 64-bit floats.')
+ [VI] = _copy_arrays_if_base_present([VI])
+ else:
+ V = np.cov(X.T)
+ VI = _convert_to_double(np.linalg.inv(V).T.copy())
+ # (u-v)V^(-1)(u-v)^T
+ _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), VI, dm)
+ elif mstr == 'canberra':
+ _distance_wrap.pdist_canberra_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'braycurtis':
+ _distance_wrap.pdist_bray_curtis_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'yule':
+ _distance_wrap.pdist_yule_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'matching':
+ _distance_wrap.pdist_matching_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'kulsinski':
+ _distance_wrap.pdist_kulsinski_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'dice':
+ _distance_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'rogerstanimoto':
+ _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'russellrao':
+ _distance_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'sokalmichener':
+ _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'sokalsneath':
+ _distance_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
+ elif metric == 'test_euclidean':
+ dm = pdist(X, euclidean)
+ elif metric == 'test_sqeuclidean':
+ if V is None:
+ V = np.var(X, axis=0, ddof=1)
+ else:
+ V = np.asarray(V)
+ dm = pdist(X, lambda u, v: seuclidean(u, v, V))
+ elif metric == 'test_braycurtis':
+ dm = pdist(X, braycurtis)
+ elif metric == 'test_mahalanobis':
+ if VI is None:
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
+ else:
+ VI = np.asarray(VI)
+ [VI] = _copy_arrays_if_base_present([VI])
+ # (u-v)V^(-1)(u-v)^T
+ dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
+ elif metric == 'test_cityblock':
+ dm = pdist(X, cityblock)
+ elif metric == 'test_minkowski':
+ dm = pdist(X, minkowski, p)
+ elif metric == 'test_cosine':
+ dm = pdist(X, cosine)
+ elif metric == 'test_correlation':
+ dm = pdist(X, correlation)
+ elif metric == 'test_hamming':
+ dm = pdist(X, hamming)
+ elif metric == 'test_jaccard':
+ dm = pdist(X, jaccard)
+ elif metric == 'test_chebyshev' or metric == 'test_chebychev':
+ dm = pdist(X, chebyshev)
+ elif metric == 'test_yule':
+ dm = pdist(X, yule)
+ elif metric == 'test_matching':
+ dm = pdist(X, matching)
+ elif metric == 'test_dice':
+ dm = pdist(X, dice)
+ elif metric == 'test_kulsinski':
+ dm = pdist(X, kulsinski)
+ elif metric == 'test_rogerstanimoto':
+ dm = pdist(X, rogerstanimoto)
+ elif metric == 'test_russellrao':
+ dm = pdist(X, russellrao)
+ elif metric == 'test_sokalsneath':
+ dm = pdist(X, sokalsneath)
+ elif metric == 'test_sokalmichener':
+ dm = pdist(X, sokalmichener)
+ else:
+ raise ValueError('Unknown Distance Metric: %s' % mstr)
+ else:
+ raise TypeError('2nd argument metric must be a string identifier or a function.')
+ return dm
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/hierarchy.py 2008-06-09 05:55:44 UTC (rev 4417)
@@ -22,9 +22,6 @@
median the median/WPGMC algorithm. (alias)
ward the Ward/incremental algorithm. (alias)
-Distance matrix computation from a collection of raw observation vectors
-
- pdist computes distances between each observation pair.
squareform converts a sq. D.M. to a condensed one and vice versa.
Statistic computations on hierarchies
@@ -47,30 +44,6 @@
lvlist a left-to-right traversal of the leaves.
totree represents a linkage matrix as a tree object.
-Distance functions between two vectors u and v
-
- braycurtis the Bray-Curtis distance.
- canberra the Canberra distance.
- chebyshev the Chebyshev distance.
- cityblock the Manhattan distance.
- correlation the Correlation distance.
- cosine the Cosine distance.
- dice the Dice dissimilarity (boolean).
- euclidean the Euclidean distance.
- hamming the Hamming distance (boolean).
- jaccard the Jaccard distance (boolean).
- kulsinski the Kulsinski distance (boolean).
- mahalanobis the Mahalanobis distance.
- matching the matching dissimilarity (boolean).
- minkowski the Minkowski distance.
- rogerstanimoto the Rogers-Tanimoto dissimilarity (boolean).
- russellrao the Russell-Rao dissimilarity (boolean).
- seuclidean the normalized Euclidean distance.
- sokalmichener the Sokal-Michener dissimilarity (boolean).
- sokalsneath the Sokal-Sneath dissimilarity (boolean).
- sqeuclidean the squared Euclidean distance.
- yule the Yule dissimilarity (boolean).
-
Predicates
is_valid_dm checks for a valid distance matrix.
@@ -176,6 +149,7 @@
import numpy as np
import _hierarchy_wrap, types
+from distance import pdist
_cpy_non_euclid_methods = {'single': 0, 'complete': 1, 'average': 2,
'weighted': 6}
@@ -191,15 +165,6 @@
def _warning(s):
print ('[WARNING] scipy.cluster: %s' % s)
-def _unbiased_variance(X):
- """
- Computes the unbiased variance of each dimension of a collection of
- observation vectors, represented by a matrix where the rows are the
- observations.
- """
- #n = np.double(X.shape[1])
- return np.var(X, axis=0, ddof=1) # * n / (n - 1.0)
-
def _copy_array_if_base_present(a):
"""
Copies the array if its base points to a parent array.
@@ -788,399 +753,6 @@
else:
raise ValueError('The first argument must be one or two dimensional array. A %d-dimensional array is not permitted' % len(s))
-def minkowski(u, v, p):
- """
- d = minkowski(u, v, p)
-
- Returns the Minkowski distance between two vectors u and v,
-
- ||u-v||_p = (\sum {|u_i - v_i|^p})^(1/p).
- """
- u = np.asarray(u)
- v = np.asarray(v)
- if p < 1:
- raise ValueError("p must be at least 1")
- return (abs(u-v)**p).sum() ** (1.0 / p)
-
-def euclidean(u, v):
- """
- d = euclidean(u, v)
-
- Computes the Euclidean distance between two n-vectors u and v, ||u-v||_2
- """
- u = np.asarray(u)
- v = np.asarray(v)
- q=np.matrix(u-v)
- return np.sqrt((q*q.T).sum())
-
-def sqeuclidean(u, v):
- """
- d = sqeuclidean(u, v)
-
- Computes the squared Euclidean distance between two n-vectors u and v,
- (||u-v||_2)^2.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return ((u-v)*(u-v).T).sum()
-
-def cosine(u, v):
- """
- d = cosine(u, v)
-
- Computes the Cosine distance between two n-vectors u and v,
- (1-uv^T)/(||u||_2 * ||v||_2).
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return (1.0 - (np.dot(u, v.T) / \
- (np.sqrt(np.dot(u, u.T)) * np.sqrt(np.dot(v, v.T)))))
-
-def correlation(u, v):
- """
- d = correlation(u, v)
-
- Computes the correlation distance between two n-vectors u and v,
-
- 1 - (u - n|u|_1)(v - n|v|_1)^T
- --------------------------------- ,
- |(u - n|u|_1)|_2 |(v - n|v|_1)|^T
-
- where |*|_1 is the Manhattan norm and n is the common dimensionality
- of the vectors.
- """
- umu = u.mean()
- vmu = v.mean()
- um = u - umu
- vm = v - vmu
- return 1.0 - (np.dot(um, vm) /
- (np.sqrt(np.dot(um, um)) \
- * np.sqrt(np.dot(vm, vm))))
-
-def hamming(u, v):
- """
- d = hamming(u, v)
-
- Computes the Hamming distance between two n-vectors u and v,
- which is simply the proportion of disagreeing components in u
- and v. If u and v are boolean vectors, the hamming distance is
-
- (c_{01} + c_{10}) / n
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return (u != v).mean()
-
-def jaccard(u, v):
- """
- d = jaccard(u, v)
-
- Computes the Jaccard-Needham dissimilarity between two boolean
- n-vectors u and v, which is
-
- c_{TF} + c_{FT}
- ------------------------
- c_{TT} + c_{FT} + c_{TF}
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return (np.double(np.bitwise_and((u != v),
- np.bitwise_or(u != 0, v != 0)).sum())
- / np.double(np.bitwise_or(u != 0, v != 0).sum()))
-
-def kulsinski(u, v):
- """
- d = kulsinski(u, v)
-
- Computes the Kulsinski dissimilarity between two boolean n-vectors
- u and v, which is
-
- c_{TF} + c_{FT} - c_{TT} + n
- ----------------------------
- c_{FT} + c_{TF} + n
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- n = len(u)
- (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
-
- return (ntf + nft - ntt + n) / (ntf + nft + n)
-
-def seuclidean(u, v, V):
- """
- d = seuclidean(u, v, V)
-
- Returns the standardized Euclidean distance between two
- n-vectors u and v. V is a m-dimensional vector of component
- variances. It is usually computed among a larger collection vectors.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- V = np.asarray(V)
- if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
- raise TypeError('V must be a 1-D array of the same dimension as u and v.')
- return np.sqrt(((u-v)**2 / V).sum())
-
-def cityblock(u, v):
- """
- d = cityblock(u, v)
-
- Computes the Manhattan distance between two n-vectors u and v,
- \sum {u_i-v_i}.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return abs(u-v).sum()
-
-def mahalanobis(u, v, VI):
- """
- d = mahalanobis(u, v, VI)
-
- Computes the Mahalanobis distance between two n-vectors u and v,
- (u-v)VI(u-v)^T
- where VI is the inverse covariance matrix.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- VI = np.asarray(VI)
- return np.sqrt(np.dot(np.dot((u-v),VI),(u-v).T).sum())
-
-def chebyshev(u, v):
- """
- d = chebyshev(u, v)
-
- Computes the Chebyshev distance between two n-vectors u and v,
- \max {|u_i-v_i|}.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return max(abs(u-v))
-
-def braycurtis(u, v):
- """
- d = braycurtis(u, v)
-
- Computes the Bray-Curtis distance between two n-vectors u and v,
- \sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return abs(u-v).sum() / abs(u+v).sum()
-
-def canberra(u, v):
- """
- d = canberra(u, v)
-
- Computes the Canberra distance between two n-vectors u and v,
- \sum{|u_i-v_i|} / \sum{|u_i|+|v_i}.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())
-
-def _nbool_correspond_all(u, v):
- if u.dtype != v.dtype:
- raise TypeError("Arrays being compared must be of the same data type.")
-
- if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
- not_u = 1.0 - u
- not_v = 1.0 - v
- nff = (not_u * not_v).sum()
- nft = (not_u * v).sum()
- ntf = (u * not_v).sum()
- ntt = (u * v).sum()
- elif u.dtype == np.bool:
- not_u = ~u
- not_v = ~v
- nff = (not_u & not_v).sum()
- nft = (not_u & v).sum()
- ntf = (u & not_v).sum()
- ntt = (u & v).sum()
- else:
- raise TypeError("Arrays being compared have unknown type.")
-
- return (nff, nft, ntf, ntt)
-
-def _nbool_correspond_ft_tf(u, v):
- if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
- not_u = 1.0 - u
- not_v = 1.0 - v
- nff = (not_u * not_v).sum()
- nft = (not_u * v).sum()
- ntf = (u * not_v).sum()
- ntt = (u * v).sum()
- else:
- not_u = ~u
- not_v = ~v
- nft = (not_u & v).sum()
- ntf = (u & not_v).sum()
- return (nft, ntf)
-
-def yule(u, v):
- """
- d = yule(u, v)
- Computes the Yule dissimilarity between two boolean n-vectors u and v,
-
- R
- ---------------------
- c_{TT} + c_{FF} + R/2
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n, and
-
- R = 2.0 * (c_{TF} + c_{FT}).
- """
- u = np.asarray(u)
- v = np.asarray(v)
- (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
- print nff, nft, ntf, ntt
- return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
-
-def matching(u, v):
- """
- d = matching(u, v)
-
- Computes the Matching dissimilarity between two boolean n-vectors
- u and v, which is
-
- (c_{TF} + c_{FT}) / n
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- (nft, ntf) = _nbool_correspond_ft_tf(u, v)
- return float(nft + ntf) / float(len(u))
-
-def dice(u, v):
- """
- d = dice(u, v)
-
- Computes the Dice dissimilarity between two boolean n-vectors
- u and v, which is
-
- c_{TF} + c_{FT}
- ----------------------------
- 2 * c_{TT} + c_{FT} + c_{TF}
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- if u.dtype == np.bool:
- ntt = (u & v).sum()
- else:
- ntt = (u * v).sum()
- (nft, ntf) = _nbool_correspond_ft_tf(u, v)
- return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
-
-def rogerstanimoto(u, v):
- """
- d = rogerstanimoto(u, v)
-
- Computes the Rogers-Tanimoto dissimilarity between two boolean
- n-vectors u and v,
-
- R
- -------------------
- c_{TT} + c_{FF} + R
-
- where c_{ij} is the number of occurrences of
-
- u[k] == i and v[k] == j
-
- for k < n, and
-
- R = 2.0 * (c_{TF} + c_{FT}).
-
- """
- u = np.asarray(u)
- v = np.asarray(v)
- (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
- return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
-
-def russellrao(u, v):
- """
- d = russellrao(u, v)
-
- Computes the Russell-Rao dissimilarity between two boolean n-vectors
- u and v, (n - c_{TT}) / n where c_{ij} is the number of occurrences
- of u[k] == i and v[k] == j for k < n.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- if u.dtype == np.bool:
- ntt = (u & v).sum()
- else:
- ntt = (u * v).sum()
- return float(len(u) - ntt) / float(len(u))
-
-def sokalmichener(u, v):
- """
- d = sokalmichener(u, v)
-
- Computes the Sokal-Michener dissimilarity between two boolean vectors
- u and v, 2R / (S + 2R) where c_{ij} is the number of occurrences of
- u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}) and
- S = c_{FF} + c_{TT}.
- """
- u = np.asarray(u)
- v = np.asarray(v)
- if u.dtype == np.bool:
- ntt = (u & v).sum()
- nff = (~u & ~v).sum()
- else:
- ntt = (u * v).sum()
- nff = ((1.0 - u) * (1.0 - v)).sum()
- (nft, ntf) = _nbool_correspond_ft_tf(u, v)
- return float(2.0 * (ntf + nft))/float(ntt + nff + 2.0 * (ntf + nft))
-
-def sokalsneath(u, v):
- """
- d = sokalsneath(u, v)
-
- Computes the Sokal-Sneath dissimilarity between two boolean vectors
- u and v, 2R / (c_{TT} + 2R) where c_{ij} is the number of occurrences
- of u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}).
- """
- u = np.asarray(u)
- v = np.asarray(v)
- if u.dtype == np.bool:
- ntt = (u & v).sum()
- else:
- ntt = (u * v).sum()
- (nft, ntf) = _nbool_correspond_ft_tf(u, v)
- return float(2.0 * (ntf + nft))/float(ntt + 2.0 * (ntf + nft))
-
def _convert_to_bool(X):
if X.dtype != np.bool:
X = np.bool_(X)
@@ -1195,380 +767,6 @@
X = X.copy()
return X
-def pdist(X, metric='euclidean', p=2, V=None, VI=None):
- """ Y = pdist(X, method='euclidean', p=2)
-
- Computes the distance between m original observations in
- n-dimensional space. Returns a condensed distance matrix Y.
- For each i and j (i<j), the metric dist(u=X[i], v=X[j]) is
- computed and stored in the ij'th entry. See squareform
- to learn how to retrieve this entry.
-
- 1. Y = pdist(X)
-
- Computes the distance between m points using Euclidean distance
- (2-norm) as the distance metric between the points. The points
- are arranged as m n-dimensional row vectors in the matrix X.
-
- 2. Y = pdist(X, 'minkowski', p)
-
- Computes the distances using the Minkowski distance ||u-v||_p
- (p-norm) where p>=1.
-
- 3. Y = pdist(X, 'cityblock')
-
- Computes the city block or Manhattan distance between the
- points.
-
- 4. Y = pdist(X, 'seuclidean', V=None)
-
- Computes the standardized Euclidean distance. The standardized
- Euclidean distance between two n-vectors u and v is
-
- sqrt(\sum {(u_i-v_i)^2 / V[x_i]}).
-
- V is the variance vector; V[i] is the variance computed over all
- the i'th components of the points. If not passed, it is
- automatically computed.
-
- 5. Y = pdist(X, 'sqeuclidean')
-
- Computes the squared Euclidean distance ||u-v||_2^2 between
- the vectors.
-
- 6. Y = pdist(X, 'cosine')
-
- Computes the cosine distance between vectors u and v,
-
- 1 - uv^T
- -----------
- |u|_2 |v|_2
-
- where |*|_2 is the 2 norm of its argument *.
-
- 7. Y = pdist(X, 'correlation')
-
- Computes the correlation distance between vectors u and v. This is
-
- 1 - (u - n|u|_1)(v - n|v|_1)^T
- --------------------------------- ,
- |(u - n|u|_1)|_2 |(v - n|v|_1)|^T
-
- where |*|_1 is the Manhattan (or 1-norm) of its argument *,
- and n is the common dimensionality of the vectors.
-
- 8. Y = pdist(X, 'hamming')
-
- Computes the normalized Hamming distance, or the proportion
- of those vector elements between two n-vectors u and v which
- disagree. To save memory, the matrix X can be of type boolean.
-
- 9. Y = pdist(X, 'jaccard')
-
- Computes the Jaccard distance between the points. Given two
- vectors, u and v, the Jaccard distance is the proportion of
- those elements u_i and v_i that disagree where at least one
- of them is non-zero.
-
- 10. Y = pdist(X, 'chebyshev')
-
- Computes the Chebyshev distance between the points. The
- Chebyshev distance between two n-vectors u and v is the maximum
- norm-1 distance between their respective elements. More
- precisely, the distance is given by
-
- d(u,v) = max {|u_i-v_i|}.
-
- 11. Y = pdist(X, 'canberra')
-
- Computes the Canberra distance between the points. The
- Canberra distance between two points u and v is
-
- |u_1-v_1| |u_2-v_2| |u_n-v_n|
- d(u,v) = ----------- + ----------- + ... + -----------
- |u_1|+|v_1| |u_2|+|v_2| |u_n|+|v_n|
-
- 12. Y = pdist(X, 'braycurtis')
-
- Computes the Bray-Curtis distance between the points. The
- Bray-Curtis distance between two points u and v is
-
- |u_1-v_1| + |u_2-v_2| + ... + |u_n-v_n|
- d(u,v) = ---------------------------------------
- |u_1+v_1| + |u_2+v_2| + ... + |u_n+v_n|
-
- 13. Y = pdist(X, 'mahalanobis', VI=None)
-
- Computes the Mahalanobis distance between the points. The
- Mahalanobis distance between two points u and v is
- (u-v)(1/V)(u-v)^T
- where (1/V) is the inverse covariance. If VI is not None,
- VI will be used as the inverse covariance matrix.
-
- 14. Y = pdist(X, 'yule')
-
- Computes the Yule distance between each pair of boolean
- vectors. (see yule function documentation)
-
- 15. Y = pdist(X, 'matching')
-
- Computes the matching distance between each pair of boolean
- vectors. (see matching function documentation)
-
- 16. Y = pdist(X, 'dice')
-
- Computes the Dice distance between each pair of boolean
- vectors. (see dice function documentation)
-
- 17. Y = pdist(X, 'kulsinski')
-
- Computes the Kulsinski distance between each pair of
- boolean vectors. (see kulsinski function documentation)
-
- 17. Y = pdist(X, 'rogerstanimoto')
-
- Computes the Rogers-Tanimoto distance between each pair of
- boolean vectors. (see rogerstanimoto function documentation)
-
- 18. Y = pdist(X, 'russellrao')
-
- Computes the Russell-Rao distance between each pair of
- boolean vectors. (see russellrao function documentation)
-
- 19. Y = pdist(X, 'sokalmichener')
-
- Computes the Sokal-Michener distance between each pair of
- boolean vectors. (see sokalmichener function documentation)
-
- 20. Y = pdist(X, 'sokalsneath')
-
- Computes the Sokal-Sneath distance between each pair of
- boolean vectors. (see sokalsneath function documentation)
-
- 21. Y = pdist(X, f)
-
- Computes the distance between all pairs of vectors in X
- using the user supplied 2-arity function f. For example,
- Euclidean distance between the vectors could be computed
- as follows,
-
- dm = pdist(X, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
-
- Note that you should avoid passing a reference to one of
- the distance functions defined in this library. For example,
-
- dm = pdist(X, sokalsneath)
-
- would calculate the pair-wise distances between the vectors
- in X using the Python function sokalsneath. This would result
- in sokalsneath being called {n \choose 2} times, which is
- inefficient. Instead, the optimized C version is more
- efficient, and we call it using the following syntax.
-
- dm = pdist(X, 'sokalsneath')
- """
-# 21. Y = pdist(X, 'test_Y')
-#
-# Computes the distance between all pairs of vectors in X
-# using the distance metric Y but with a more succint,
-# verifiable, but less efficient implementation.
-
-
- X = np.asarray(X)
-
- #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
- # raise TypeError('Floating point arrays must be 64-bit (got %r).' %
- # (X.dtype.type,))
-
- # The C code doesn't do striding.
- [X] = _copy_arrays_if_base_present([_convert_to_double(X)])
-
- s = X.shape
-
- if len(s) != 2:
- raise ValueError('A 2-dimensional array must be passed.');
-
- m = s[0]
- n = s[1]
- dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
-
- mtype = type(metric)
- if mtype is types.FunctionType:
- k = 0
- if metric == minkowski:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = minkowski(X[i, :], X[j, :], p)
- k = k + 1
- elif metric == seuclidean:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = seuclidean(X[i, :], X[j, :], V)
- k = k + 1
- elif metric == mahalanobis:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = mahalanobis(X[i, :], X[j, :], V)
- k = k + 1
- else:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = metric(X[i, :], X[j, :])
- k = k + 1
-
- elif mtype is types.StringType:
- mstr = metric.lower()
-
- #if X.dtype != np.double and \
- # (mstr != 'hamming' and mstr != 'jaccard'):
- # TypeError('A double array must be passed.')
- if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
- _hierarchy_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
- elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
- _hierarchy_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
- dm = dm ** 2.0
- elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
- _hierarchy_wrap.pdist_city_block_wrap(X, dm)
- elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
- if X.dtype == np.bool:
- _hierarchy_wrap.pdist_hamming_bool_wrap(_convert_to_bool(X), dm)
- else:
- _hierarchy_wrap.pdist_hamming_wrap(_convert_to_double(X), dm)
- elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
- if X.dtype == np.bool:
- _hierarchy_wrap.pdist_jaccard_bool_wrap(_convert_to_bool(X), dm)
- else:
- _hierarchy_wrap.pdist_jaccard_wrap(_convert_to_double(X), dm)
- elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
- _hierarchy_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
- elif mstr in set(['minkowski', 'mi', 'm']):
- _hierarchy_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
- elif mstr in set(['seuclidean', 'se', 's']):
- if V is not None:
- V = np.asarray(V)
- if type(V) != np.ndarray:
- raise TypeError('Variance vector V must be a numpy array')
- if V.dtype != np.double:
- raise TypeError('Variance vector V must contain doubles.')
- if len(V.shape) != 1:
- raise ValueError('Variance vector V must be one-dimensional.')
- if V.shape[0] != n:
- raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.')
- # The C code doesn't do striding.
- [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
- else:
- VV = _unbiased_variance(X)
- _hierarchy_wrap.pdist_seuclidean_wrap(_convert_to_double(X), VV, dm)
- # Need to test whether vectorized cosine works better.
- # Find out: Is there a dot subtraction operator so I can
- # subtract matrices in a similar way to multiplying them?
- # Need to get rid of as much unnecessary C code as possible.
- elif mstr in set(['cosine', 'cos']):
- norms = np.sqrt(np.sum(X * X, axis=1))
- _hierarchy_wrap.pdist_cosine_wrap(_convert_to_double(X), dm, norms)
- elif mstr in set(['old_cosine', 'old_cos']):
- norms = np.sqrt(np.sum(X * X, axis=1))
- nV = norms.reshape(m, 1)
- # The numerator u * v
- nm = np.dot(X, X.T)
- # The denom. ||u||*||v||
- de = np.dot(nV, nV.T);
- dm = 1.0 - (nm / de)
- dm[xrange(0,m),xrange(0,m)] = 0.0
- dm = squareform(dm)
- elif mstr in set(['correlation', 'co']):
- X2 = X - X.mean(1)[:,np.newaxis]
- #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
- norms = np.sqrt(np.sum(X2 * X2, axis=1))
- _hierarchy_wrap.pdist_cosine_wrap(_convert_to_double(X2), _convert_to_double(dm), _convert_to_double(norms))
- elif mstr in set(['mahalanobis', 'mahal', 'mah']):
- if VI is not None:
- VI = _convert_to_double(np.asarray(VI))
- if type(VI) != np.ndarray:
- raise TypeError('VI must be a numpy array.')
- if VI.dtype != np.double:
- raise TypeError('The array must contain 64-bit floats.')
- [VI] = _copy_arrays_if_base_present([VI])
- else:
- V = np.cov(X.T)
- VI = _convert_to_double(np.linalg.inv(V).T.copy())
- # (u-v)V^(-1)(u-v)^T
- _hierarchy_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), VI, dm)
- elif mstr == 'canberra':
- _hierarchy_wrap.pdist_canberra_wrap(_convert_to_bool(X), dm)
- elif mstr == 'braycurtis':
- _hierarchy_wrap.pdist_bray_curtis_wrap(_convert_to_bool(X), dm)
- elif mstr == 'yule':
- _hierarchy_wrap.pdist_yule_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'matching':
- _hierarchy_wrap.pdist_matching_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'kulsinski':
- _hierarchy_wrap.pdist_kulsinski_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'dice':
- _hierarchy_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'rogerstanimoto':
- _hierarchy_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'russellrao':
- _hierarchy_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'sokalmichener':
- _hierarchy_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), dm)
- elif mstr == 'sokalsneath':
- _hierarchy_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
- elif metric == 'test_euclidean':
- dm = pdist(X, euclidean)
- elif metric == 'test_sqeuclidean':
- if V is None:
- V = _unbiased_variance(X)
- else:
- V = np.asarray(V)
- dm = pdist(X, lambda u, v: seuclidean(u, v, V))
- elif metric == 'test_braycurtis':
- dm = pdist(X, braycurtis)
- elif metric == 'test_mahalanobis':
- if VI is None:
- V = np.cov(X.T)
- VI = np.linalg.inv(V)
- else:
- VI = np.asarray(VI)
- [VI] = _copy_arrays_if_base_present([VI])
- # (u-v)V^(-1)(u-v)^T
- dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
- elif metric == 'test_cityblock':
- dm = pdist(X, cityblock)
- elif metric == 'test_minkowski':
- dm = pdist(X, minkowski, p)
- elif metric == 'test_cosine':
- dm = pdist(X, cosine)
- elif metric == 'test_correlation':
- dm = pdist(X, correlation)
- elif metric == 'test_hamming':
- dm = pdist(X, hamming)
- elif metric == 'test_jaccard':
- dm = pdist(X, jaccard)
- elif metric == 'test_chebyshev' or metric == 'test_chebychev':
- dm = pdist(X, chebyshev)
- elif metric == 'test_yule':
- dm = pdist(X, yule)
- elif metric == 'test_matching':
- dm = pdist(X, matching)
- elif metric == 'test_dice':
- dm = pdist(X, dice)
- elif metric == 'test_kulsinski':
- dm = pdist(X, kulsinski)
- elif metric == 'test_rogerstanimoto':
- dm = pdist(X, rogerstanimoto)
- elif metric == 'test_russellrao':
- dm = pdist(X, russellrao)
- elif metric == 'test_sokalsneath':
- dm = pdist(X, sokalsneath)
- elif metric == 'test_sokalmichener':
- dm = pdist(X, sokalmichener)
- else:
- raise ValueError('Unknown Distance Metric: %s' % mstr)
- else:
- raise TypeError('2nd argument metric must be a string identifier or a function.')
- return dm
-
def cophenet(*args, **kwargs):
"""
d = cophenet(Z)
Modified: trunk/scipy/cluster/setup.py
===================================================================
--- trunk/scipy/cluster/setup.py 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/setup.py 2008-06-09 05:55:44 UTC (rev 4417)
@@ -12,6 +12,10 @@
sources=[join('src', 'vq_module.c'), join('src', 'vq.c')],
include_dirs = [get_numpy_include_dirs()])
+ config.add_extension('_distance_wrap',
+ sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
+ include_dirs = [get_numpy_include_dirs()])
+
config.add_extension('_hierarchy_wrap',
sources=[join('src', 'hierarchy_wrap.c'), join('src', 'hierarchy.c')],
include_dirs = [get_numpy_include_dirs()])
Added: trunk/scipy/cluster/src/common.h
===================================================================
--- trunk/scipy/cluster/src/common.h 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/common.h 2008-06-09 05:55:44 UTC (rev 4417)
@@ -0,0 +1,70 @@
+/**
+ * common.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved into new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CLUSTER_COMMON_H
+#define _CLUSTER_COMMON_H
+
+#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
+#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
+
+#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+
+#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
+#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
+ CPY_BITS_PER_CHAR))
+#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
+ ((CPY_BITS_PER_CHAR-1) - \
+ ((i) % CPY_BITS_PER_CHAR))) & 0x1)
+#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
+ ((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
+ ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+
+#ifndef CPY_CEIL_DIV
+#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
+ ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
+#endif
+
+
+#ifdef CPY_DEBUG
+#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define CPY_DEBUG_MSG(...)
+#endif
+
+#endif
Added: trunk/scipy/cluster/src/distance.c
===================================================================
--- trunk/scipy/cluster/src/distance.c 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/distance.c 2008-06-09 05:55:44 UTC (rev 4417)
@@ -0,0 +1,592 @@
+/**
+ * distance.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include "common.h"
+#include "distance.h"
+
+static inline double euclidean_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + d * d;
+ }
+ return sqrt(s);
+}
+
+static inline double ess_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d * d;
+ }
+ return s;
+}
+
+static inline double chebyshev_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double d, maxv = 0.0;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ if (d > maxv) {
+ maxv = d;
+ }
+ }
+ return maxv;
+}
+
+static inline double canberra_distance(const double *u, const double *v, int n) {
+ int i;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += (fabs(u[i] - v[i]) / (fabs(u[i]) + fabs(v[i])));
+ }
+ return s;
+}
+
+static inline double bray_curtis_distance(const double *u, const double *v, int n) {
+ int i;
+ double s1 = 0.0, s2 = 0.0;
+ for (i = 0; i < n; i++) {
+ s1 += fabs(u[i] - v[i]);
+ s2 += fabs(u[i] + v[i]);
+ }
+ return s1 / s2;
+}
+
+static inline double mahalanobis_distance(const double *u, const double *v,
+ const double *covinv, double *dimbuf1,
+ double *dimbuf2, int n) {
+ int i, j;
+ double s;
+ const double *covrow = covinv;
+ for (i = 0; i < n; i++) {
+ dimbuf1[i] = u[i] - v[i];
+ }
+ for (i = 0; i < n; i++) {
+ covrow = covinv + (i * n);
+ s = 0.0;
+ for (j = 0; j < n; j++) {
+ s += dimbuf1[j] * covrow[j];
+ }
+ dimbuf2[i] = s;
+ }
+ s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += dimbuf1[i] * dimbuf2[i];
+ }
+ return sqrt(s);
+}
+
+double hamming_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static inline double hamming_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static inline double yule_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * ntf * nft) / (double)(ntt * nff + ntf * nft);
+}
+
+static inline double matching_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(ntf + nft) / (double)(n);
+}
+
+static inline double dice_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(nft + ntf) / (double)(2.0 * ntt + ntf + nft);
+}
+
+
+static inline double rogerstanimoto_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * (ntf + nft)) / ((double)ntt + nff + (2.0 * (ntf + nft)));
+}
+
+static inline double russellrao_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ /** int nff = 0, nft = 0, ntf = 0;**/
+ int ntt = 0;
+ for (i = 0; i < n; i++) {
+ /** nff += (!u[i] && !v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);**/
+ ntt += (u[i] && v[i]);
+ }
+ /** return (double)(ntf + nft + nff) / (double)n;**/
+ return (double) (n - ntt) / (double) n;
+}
+
+static inline double kulsinski_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ }
+ return ((double)(ntf + nft - ntt + n)) / ((double)(ntf + nft + n));
+}
+
+static inline double sokalsneath_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt);
+}
+
+static inline double sokalmichener_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt + nff);
+}
+
+static inline double jaccard_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double denom = 0.0, num = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0.0) || (v[i] != 0.0));
+ denom += (u[i] != 0.0) || (v[i] != 0.0);
+ }
+ return num / denom;
+}
+
+static inline double jaccard_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double num = 0.0, denom = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0) || (v[i] != 0));
+ denom += (u[i] != 0) || (v[i] != 0);
+ }
+ return num / denom;
+}
+
+static inline double dot_product(const double *u, const double *v, int n) {
+ int i;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += u[i] * v[i];
+ }
+ return s;
+}
+
+static inline double cosine_distance(const double *u, const double *v, int n,
+ const double nu, const double nv) {
+ return 1.0 - (dot_product(u, v, n) / (nu * nv));
+}
+
+static inline double seuclidean_distance(const double *var,
+ const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + (d * d) / var[i];
+ }
+ return sqrt(s);
+}
+
+static inline double city_block_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d;
+ }
+ return s;
+}
+
+double minkowski_distance(const double *u, const double *v, int n, double p) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
+void compute_mean_vector(double *res, const double *X, int m, int n) {
+ int i, j;
+ const double *v;
+ for (i = 0; i < n; i++) {
+ res[i] = 0.0;
+ }
+ for (j = 0; j < m; j++) {
+
+ v = X + (j * n);
+ for (i = 0; i < n; i++) {
+ res[i] += v[i];
+ }
+ }
+ for (i = 0; i < n; i++) {
+ res[i] /= (double)m;
+ }
+}
+
+void pdist_euclidean(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = euclidean_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ double *dimbuf1, *dimbuf2;
+ dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
+ dimbuf2 = dimbuf1 + n;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
+ }
+ }
+ dimbuf2 = 0;
+ free(dimbuf1);
+}
+
+void pdist_bray_curtis(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = bray_curtis_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_canberra(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = canberra_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance_bool(u, v, n);
+ }
+ }
+}
+
+
+void pdist_chebyshev(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = chebyshev_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = cosine_distance(u, v, n, norms[i], norms[j]);
+ }
+ }
+}
+
+void pdist_seuclidean(const double *X, const double *var,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = seuclidean_distance(var, u, v, n);
+ }
+ }
+}
+
+void pdist_city_block(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = city_block_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = minkowski_distance(u, v, n, p);
+ }
+ }
+}
+
+void pdist_yule_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = yule_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_matching_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = matching_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_dice_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = dice_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = rogerstanimoto_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = russellrao_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = kulsinski_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalsneath_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalmichener_distance_bool(u, v, n);
+ }
+ }
+}
Added: trunk/scipy/cluster/src/distance.h
===================================================================
--- trunk/scipy/cluster/src/distance.h 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/distance.h 2008-06-09 05:55:44 UTC (rev 4417)
@@ -0,0 +1,64 @@
+/**
+ * distance.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CPY_DISTANCE_H
+#define _CPY_DISTANCE_H
+
+void pdist_euclidean(const double *X, double *dm, int m, int n);
+void pdist_seuclidean(const double *X,
+ const double *var, double *dm, int m, int n);
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n);
+void pdist_bray_curtis(const double *X, double *dm, int m, int n);
+void pdist_canberra(const double *X, double *dm, int m, int n);
+void pdist_hamming(const double *X, double *dm, int m, int n);
+void pdist_hamming_bool(const char *X, double *dm, int m, int n);
+void pdist_city_block(const double *X, double *dm, int m, int n);
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms);
+void pdist_chebyshev(const double *X, double *dm, int m, int n);
+void pdist_jaccard(const double *X, double *dm, int m, int n);
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n);
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n);
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p);
+void pdist_yule_bool(const char *X, double *dm, int m, int n);
+void pdist_matching_bool(const char *X, double *dm, int m, int n);
+void pdist_dice_bool(const char *X, double *dm, int m, int n);
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n);
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n);
+
+#endif
Added: trunk/scipy/cluster/src/distance_wrap.c
===================================================================
--- trunk/scipy/cluster/src/distance_wrap.c 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/distance_wrap.c 2008-06-09 05:55:44 UTC (rev 4417)
@@ -0,0 +1,525 @@
+/**
+ * distance_wrap.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "distance.h"
+#include "Python.h"
+#include <numpy/arrayobject.h>
+#include <stdio.h>
+
+extern PyObject *pdist_euclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_euclidean(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_canberra_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_canberra(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_bray_curtis(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *covinv_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ const double *covinv;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &covinv_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ covinv = (const double*)covinv_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_mahalanobis(X, covinv, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_chebyshev_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_chebyshev(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_cosine_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *norms_;
+ int m, n;
+ double *dm;
+ const double *X, *norms;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &PyArray_Type, &norms_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ norms = (const double*)norms_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_cosine(X, dm, m, n, norms);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_seuclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *var_;
+ int m, n;
+ double *dm;
+ const double *X, *var;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &var_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ var = (double*)var_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_seuclidean(X, var, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_city_block_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_city_block(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm, *X;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!d",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_minkowski(X, dm, m, n, p);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_yule_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_yule_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_matching_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_matching_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_dice_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_dice_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_rogerstanimoto_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_russellrao_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_kulsinski_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalmichener_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalsneath_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+
+static PyMethodDef _distanceWrapMethods[] = {
+ {"pdist_bray_curtis_wrap", pdist_bray_curtis_wrap, METH_VARARGS},
+ {"pdist_canberra_wrap", pdist_canberra_wrap, METH_VARARGS},
+ {"pdist_chebyshev_wrap", pdist_chebyshev_wrap, METH_VARARGS},
+ {"pdist_city_block_wrap", pdist_city_block_wrap, METH_VARARGS},
+ {"pdist_cosine_wrap", pdist_cosine_wrap, METH_VARARGS},
+ {"pdist_dice_bool_wrap", pdist_dice_bool_wrap, METH_VARARGS},
+ {"pdist_euclidean_wrap", pdist_euclidean_wrap, METH_VARARGS},
+ {"pdist_hamming_wrap", pdist_hamming_wrap, METH_VARARGS},
+ {"pdist_hamming_bool_wrap", pdist_hamming_bool_wrap, METH_VARARGS},
+ {"pdist_jaccard_wrap", pdist_jaccard_wrap, METH_VARARGS},
+ {"pdist_jaccard_bool_wrap", pdist_jaccard_bool_wrap, METH_VARARGS},
+ {"pdist_kulsinski_bool_wrap", pdist_kulsinski_bool_wrap, METH_VARARGS},
+ {"pdist_mahalanobis_wrap", pdist_mahalanobis_wrap, METH_VARARGS},
+ {"pdist_matching_bool_wrap", pdist_matching_bool_wrap, METH_VARARGS},
+ {"pdist_minkowski_wrap", pdist_minkowski_wrap, METH_VARARGS},
+ {"pdist_rogerstanimoto_bool_wrap", pdist_rogerstanimoto_bool_wrap, METH_VARARGS},
+ {"pdist_russellrao_bool_wrap", pdist_russellrao_bool_wrap, METH_VARARGS},
+ {"pdist_seuclidean_wrap", pdist_seuclidean_wrap, METH_VARARGS},
+ {"pdist_sokalmichener_bool_wrap", pdist_sokalmichener_bool_wrap, METH_VARARGS},
+ {"pdist_sokalsneath_bool_wrap", pdist_sokalsneath_bool_wrap, METH_VARARGS},
+ {"pdist_yule_bool_wrap", pdist_yule_bool_wrap, METH_VARARGS},
+ {NULL, NULL} /* Sentinel - marks the end of this structure */
+};
+
+void init_distance_wrap(void) {
+ (void) Py_InitModule("_distance_wrap", _distanceWrapMethods);
+ import_array(); // Must be present for NumPy. Called first after above line.
+}
Modified: trunk/scipy/cluster/src/hierarchy.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy.c 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/hierarchy.c 2008-06-09 05:55:44 UTC (rev 4417)
@@ -34,12 +34,11 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+#include "common.h"
+
#define ISCLUSTER(_nd) ((_nd)->id >= n)
#define GETCLUSTER(_id) ((lists + _id - n))
-#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
-#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
/** The number of link stats (for the inconsistency computation) for each
cluster. */
@@ -61,39 +60,15 @@
#define CPY_LIN_DIST 2
#define CPY_LIN_CNT 3
-#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
-#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
- CPY_BITS_PER_CHAR))
-#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
- ((CPY_BITS_PER_CHAR-1) - \
- ((i) % CPY_BITS_PER_CHAR))) & 0x1)
-#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
- ((0x1) << ((CPY_BITS_PER_CHAR-1) \
- -((i) % CPY_BITS_PER_CHAR))))
-#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
- ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
- -((i) % CPY_BITS_PER_CHAR))))
-
-#ifndef CPY_CEIL_DIV
-#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
- ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
-#endif
-
-
-#ifdef CPY_DEBUG
-#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
-#else
-#define CPY_DEBUG_MSG(...)
-#endif
-
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "hierarchy.h"
+#include "distance.h"
-double euclidean_distance(const double *u, const double *v, int n) {
+static inline double euclidean_distance(const double *u, const double *v, int n) {
int i = 0;
double s = 0.0, d;
for (i = 0; i < n; i++) {
@@ -103,548 +78,6 @@
return sqrt(s);
}
-double ess_distance(const double *u, const double *v, int n) {
- int i = 0;
- double s = 0.0, d;
- for (i = 0; i < n; i++) {
- d = fabs(u[i] - v[i]);
- s = s + d * d;
- }
- return s;
-}
-
-double chebyshev_distance(const double *u, const double *v, int n) {
- int i = 0;
- double d, maxv = 0.0;
- for (i = 0; i < n; i++) {
- d = fabs(u[i] - v[i]);
- if (d > maxv) {
- maxv = d;
- }
- }
- return maxv;
-}
-
-double canberra_distance(const double *u, const double *v, int n) {
- int i;
- double s = 0.0;
- for (i = 0; i < n; i++) {
- s += (fabs(u[i] - v[i]) / (fabs(u[i]) + fabs(v[i])));
- }
- return s;
-}
-
-double bray_curtis_distance(const double *u, const double *v, int n) {
- int i;
- double s1 = 0.0, s2 = 0.0;
- for (i = 0; i < n; i++) {
- s1 += fabs(u[i] - v[i]);
- s2 += fabs(u[i] + v[i]);
- }
- return s1 / s2;
-}
-
-double mahalanobis_distance(const double *u, const double *v,
- const double *covinv, double *dimbuf1,
- double *dimbuf2, int n) {
- int i, j;
- double s;
- const double *covrow = covinv;
- for (i = 0; i < n; i++) {
- dimbuf1[i] = u[i] - v[i];
- }
- for (i = 0; i < n; i++) {
- covrow = covinv + (i * n);
- s = 0.0;
- for (j = 0; j < n; j++) {
- s += dimbuf1[j] * covrow[j];
- }
- dimbuf2[i] = s;
- }
- s = 0.0;
- for (i = 0; i < n; i++) {
- s += dimbuf1[i] * dimbuf2[i];
- }
- return sqrt(s);
-}
-
-double hamming_distance(const double *u, const double *v, int n) {
- int i = 0;
- double s = 0.0;
- for (i = 0; i < n; i++) {
- s = s + (u[i] != v[i]);
- }
- return s / (double)n;
-}
-
-double hamming_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- double s = 0.0;
- for (i = 0; i < n; i++) {
- s = s + (u[i] != v[i]);
- }
- return s / (double)n;
-}
-
-double yule_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- int ntt = 0, nff = 0, nft = 0, ntf = 0;
- for (i = 0; i < n; i++) {
- ntt += (u[i] && v[i]);
- ntf += (u[i] && !v[i]);
- nft += (!u[i] && v[i]);
- nff += (!u[i] && !v[i]);
- }
- return (2.0 * ntf * nft) / (double)(ntt * nff + ntf * nft);
-}
-
-double matching_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- int nft = 0, ntf = 0;
- for (i = 0; i < n; i++) {
- ntf += (u[i] && !v[i]);
- nft += (!u[i] && v[i]);
- }
- return (double)(ntf + nft) / (double)(n);
-}
-
-double dice_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- int ntt = 0, nft = 0, ntf = 0;
- for (i = 0; i < n; i++) {
- ntt += (u[i] && v[i]);
- ntf += (u[i] && !v[i]);
- nft += (!u[i] && v[i]);
- }
- return (double)(nft + ntf) / (double)(2.0 * ntt + ntf + nft);
-}
-
-
-double rogerstanimoto_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- int ntt = 0, nff = 0, nft = 0, ntf = 0;
- for (i = 0; i < n; i++) {
- ntt += (u[i] && v[i]);
- ntf += (u[i] && !v[i]);
- nft += (!u[i] && v[i]);
- nff += (!u[i] && !v[i]);
- }
- return (2.0 * (ntf + nft)) / ((double)ntt + nff + (2.0 * (ntf + nft)));
-}
-
-double russellrao_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- /** int nff = 0, nft = 0, ntf = 0;**/
- int ntt = 0;
- for (i = 0; i < n; i++) {
- /** nff += (!u[i] && !v[i]);
- ntf += (u[i] && !v[i]);
- nft += (!u[i] && v[i]);**/
- ntt += (u[i] && v[i]);
- }
- /** return (double)(ntf + nft + nff) / (double)n;**/
- return (double) (n - ntt) / (double) n;
-}
-
-static inline double kulsinski_distance_bool(const char *u, const char *v, int n) {
- int _i = 0;
- int ntt = 0, nft = 0, ntf = 0, nff = 0;
- for (_i = 0; _i < n; _i++) {
- ntt += (u[_i] && v[_i]);
- ntf += (u[_i] && !v[_i]);
- nft += (!u[_i] && v[_i]);
- nff += (!u[_i] && !v[_i]);
- }
- return ((double)(ntf + nft - ntt + n)) / ((double)(ntf + nft + n));
-}
-
-static inline double sokalsneath_distance_bool(const char *u, const char *v, int n) {
- int _i = 0;
- int ntt = 0, nft = 0, ntf = 0;
- for (_i = 0; _i < n; _i++) {
- ntt += (u[_i] && v[_i]);
- ntf += (u[_i] && !v[_i]);
- nft += (!u[_i] && v[_i]);
- }
- return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt);
-}
-
-static inline double sokalmichener_distance_bool(const char *u, const char *v, int n) {
- int _i = 0;
- int ntt = 0, nft = 0, ntf = 0, nff = 0;
- for (_i = 0; _i < n; _i++) {
- ntt += (u[_i] && v[_i]);
- nff += (!u[_i] && !v[_i]);
- ntf += (u[_i] && !v[_i]);
- nft += (!u[_i] && v[_i]);
- }
- return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt + nff);
-}
-
-double jaccard_distance(const double *u, const double *v, int n) {
- int i = 0;
- double denom = 0.0, num = 0.0;
- for (i = 0; i < n; i++) {
- num += (u[i] != v[i]) && ((u[i] != 0.0) || (v[i] != 0.0));
- denom += (u[i] != 0.0) || (v[i] != 0.0);
- }
- return num / denom;
-}
-
-double jaccard_distance_bool(const char *u, const char *v, int n) {
- int i = 0;
- double num = 0.0, denom = 0.0;
- for (i = 0; i < n; i++) {
- num += (u[i] != v[i]) && ((u[i] != 0) || (v[i] != 0));
- denom += (u[i] != 0) || (v[i] != 0);
- }
- return num / denom;
-}
-
-double dot_product(const double *u, const double *v, int n) {
- int i;
- double s = 0.0;
- for (i = 0; i < n; i++) {
- s += u[i] * v[i];
- }
- return s;
-}
-
-double cosine_distance(const double *u, const double *v, int n,
- const double nu, const double nv) {
- return 1.0 - (dot_product(u, v, n) / (nu * nv));
-}
-
-double seuclidean_distance(const double *var,
- const double *u, const double *v, int n) {
- int i = 0;
- double s = 0.0, d;
- for (i = 0; i < n; i++) {
- d = u[i] - v[i];
- s = s + (d * d) / var[i];
- }
- return sqrt(s);
-}
-
-double city_block_distance(const double *u, const double *v, int n) {
- int i = 0;
- double s = 0.0, d;
- for (i = 0; i < n; i++) {
- d = fabs(u[i] - v[i]);
- s = s + d;
- }
- return s;
-}
-
-double minkowski_distance(const double *u, const double *v, int n, double p) {
- int i = 0;
- double s = 0.0, d;
- for (i = 0; i < n; i++) {
- d = fabs(u[i] - v[i]);
- s = s + pow(d, p);
- }
- return pow(s, 1.0 / p);
-}
-
-void compute_mean_vector(double *res, const double *X, int m, int n) {
- int i, j;
- const double *v;
- for (i = 0; i < n; i++) {
- res[i] = 0.0;
- }
- for (j = 0; j < m; j++) {
-
- v = X + (j * n);
- for (i = 0; i < n; i++) {
- res[i] += v[i];
- }
- }
- for (i = 0; i < n; i++) {
- res[i] /= (double)m;
- }
-}
-
-void pdist_euclidean(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = euclidean_distance(u, v, n);
- }
- }
-}
-
-void pdist_mahalanobis(const double *X, const double *covinv,
- double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- double *dimbuf1, *dimbuf2;
- dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
- dimbuf2 = dimbuf1 + n;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
- }
- }
- dimbuf2 = 0;
- free(dimbuf1);
-}
-
-void pdist_bray_curtis(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = bray_curtis_distance(u, v, n);
- }
- }
-}
-
-void pdist_canberra(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = canberra_distance(u, v, n);
- }
- }
-}
-
-void pdist_hamming(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = hamming_distance(u, v, n);
- }
- }
-}
-
-void pdist_hamming_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = hamming_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_jaccard(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = jaccard_distance(u, v, n);
- }
- }
-}
-
-void pdist_jaccard_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = jaccard_distance_bool(u, v, n);
- }
- }
-}
-
-
-void pdist_chebyshev(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = chebyshev_distance(u, v, n);
- }
- }
-}
-
-void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = cosine_distance(u, v, n, norms[i], norms[j]);
- }
- }
-}
-
-void pdist_seuclidean(const double *X, const double *var,
- double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = seuclidean_distance(var, u, v, n);
- }
- }
-}
-
-void pdist_city_block(const double *X, double *dm, int m, int n) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = city_block_distance(u, v, n);
- }
- }
-}
-
-void pdist_minkowski(const double *X, double *dm, int m, int n, double p) {
- int i, j;
- const double *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = minkowski_distance(u, v, n, p);
- }
- }
-}
-
-void pdist_yule_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = yule_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_matching_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = matching_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_dice_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = dice_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = rogerstanimoto_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_russellrao_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = russellrao_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_kulsinski_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = kulsinski_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = sokalsneath_distance_bool(u, v, n);
- }
- }
-}
-
-void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n) {
- int i, j;
- const char *u, *v;
- double *it = dm;
- for (i = 0; i < m; i++) {
- for (j = i + 1; j < m; j++, it++) {
- u = X + (n * i);
- v = X + (n * j);
- *it = sokalmichener_distance_bool(u, v, n);
- }
- }
-}
-
void chopmins(int *ind, int mini, int minj, int np) {
int i;
for (i = mini; i < minj - 1; i++) {
Modified: trunk/scipy/cluster/src/hierarchy.h
===================================================================
--- trunk/scipy/cluster/src/hierarchy.h 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/hierarchy.h 2008-06-09 05:55:44 UTC (rev 4417)
@@ -34,8 +34,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _CPY_CLUSTER_H
-#define _CPY_CLUSTER_H
+#ifndef _CPY_HIERARCHY_H
+#define _CPY_HIERARCHY_H
#define CPY_LINKAGE_SINGLE 0
#define CPY_LINKAGE_COMPLETE 1
@@ -89,35 +89,9 @@
void dist_to_squareform_from_vector(double *M, const double *v, int n);
void dist_to_vector_from_squareform(const double *M, double *v, int n);
-void pdist_euclidean(const double *X, double *dm, int m, int n);
-void pdist_seuclidean(const double *X,
- const double *var, double *dm, int m, int n);
-void pdist_mahalanobis(const double *X, const double *covinv,
- double *dm, int m, int n);
-void pdist_bray_curtis(const double *X, double *dm, int m, int n);
-void pdist_canberra(const double *X, double *dm, int m, int n);
-void pdist_hamming(const double *X, double *dm, int m, int n);
-void pdist_hamming_bool(const char *X, double *dm, int m, int n);
-void pdist_city_block(const double *X, double *dm, int m, int n);
-void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms);
-void pdist_chebyshev(const double *X, double *dm, int m, int n);
-void pdist_jaccard(const double *X, double *dm, int m, int n);
-void pdist_jaccard_bool(const char *X, double *dm, int m, int n);
-void pdist_kulsinski_bool(const char *X, double *dm, int m, int n);
-void pdist_minkowski(const double *X, double *dm, int m, int n, double p);
-void pdist_yule_bool(const char *X, double *dm, int m, int n);
-void pdist_matching_bool(const char *X, double *dm, int m, int n);
-void pdist_dice_bool(const char *X, double *dm, int m, int n);
-void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n);
-void pdist_russellrao_bool(const char *X, double *dm, int m, int n);
-void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n);
-void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n);
-
void inconsistency_calculation(const double *Z, double *R, int n, int d);
void inconsistency_calculation_alt(const double *Z, double *R, int n, int d);
-double dot_product(const double *u, const double *v, int n);
-
void chopmins(int *ind, int mini, int minj, int np);
void chopmins_ns_i(double *ind, int mini, int np);
void chopmins_ns_ij(double *ind, int mini, int minj, int np);
Modified: trunk/scipy/cluster/src/hierarchy_wrap.c
===================================================================
--- trunk/scipy/cluster/src/hierarchy_wrap.c 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/src/hierarchy_wrap.c 2008-06-09 05:55:44 UTC (rev 4417)
@@ -332,18 +332,6 @@
return Py_BuildValue("d", 0.0);
}
-extern PyObject *dot_product_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *d1_, *d2_;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &d1_,
- &PyArray_Type, &d2_)) {
- return 0;
- }
- return Py_BuildValue("d", dot_product((const double*)d1_->data,
- (const double*)d2_->data,
- d1_->dimensions[0]));
-}
-
extern PyObject *to_squareform_from_vector_wrap(PyObject *self, PyObject *args) {
PyArrayObject *M_, *v_;
int n;
@@ -382,459 +370,6 @@
return Py_BuildValue("d", 0.0);
}
-extern PyObject *pdist_euclidean_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_euclidean(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_canberra_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_canberra(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_bray_curtis(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-
-extern PyObject *pdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *covinv_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- const double *covinv;
- if (!PyArg_ParseTuple(args, "O!O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &covinv_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- covinv = (const double*)covinv_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_mahalanobis(X, covinv, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-
-extern PyObject *pdist_chebyshev_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_chebyshev(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-
-extern PyObject *pdist_cosine_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_, *norms_;
- int m, n;
- double *dm;
- const double *X, *norms;
- if (!PyArg_ParseTuple(args, "O!O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_,
- &PyArray_Type, &norms_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- norms = (const double*)norms_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_cosine(X, dm, m, n, norms);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_seuclidean_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_, *var_;
- int m, n;
- double *dm;
- const double *X, *var;
- if (!PyArg_ParseTuple(args, "O!O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &var_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (double*)X_->data;
- dm = (double*)dm_->data;
- var = (double*)var_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_seuclidean(X, var, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_city_block_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_city_block(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_hamming_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_hamming(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_hamming_bool(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_jaccard_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const double *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_jaccard(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_jaccard_bool(X, dm, m, n);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-extern PyObject *pdist_minkowski_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm, *X;
- double p;
- if (!PyArg_ParseTuple(args, "O!O!d",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_,
- &p)) {
- return 0;
- }
- else {
- X = (double*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_minkowski(X, dm, m, n, p);
- }
- return Py_BuildValue("d", 0.0);
-}
-
-
-extern PyObject *pdist_yule_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_yule_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_matching_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_matching_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_dice_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_dice_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_rogerstanimoto_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_russellrao_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_kulsinski_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_sokalmichener_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
-extern PyObject *pdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
- PyArrayObject *X_, *dm_;
- int m, n;
- double *dm;
- const char *X;
- if (!PyArg_ParseTuple(args, "O!O!",
- &PyArray_Type, &X_,
- &PyArray_Type, &dm_)) {
- return 0;
- }
- else {
- X = (const char*)X_->data;
- dm = (double*)dm_->data;
- m = X_->dimensions[0];
- n = X_->dimensions[1];
-
- pdist_sokalsneath_bool(X, dm, m, n);
- }
- return Py_BuildValue("");
-}
-
extern PyObject *leaders_wrap(PyObject *self, PyObject *args) {
PyArrayObject *Z_, *T_, *L_, *M_;
int kk, n, res;
@@ -864,7 +399,6 @@
{"cluster_maxclust_monocrit_wrap", cluster_maxclust_monocrit_wrap, METH_VARARGS},
{"cluster_monocrit_wrap", cluster_monocrit_wrap, METH_VARARGS},
{"cophenetic_distances_wrap", cophenetic_distances_wrap, METH_VARARGS},
- {"dot_product_wrap", dot_product_wrap, METH_VARARGS},
{"get_max_dist_for_each_cluster_wrap",
get_max_dist_for_each_cluster_wrap, METH_VARARGS},
{"get_max_Rfield_for_each_cluster_wrap",
@@ -873,27 +407,6 @@
{"leaders_wrap", leaders_wrap, METH_VARARGS},
{"linkage_euclid_wrap", linkage_euclid_wrap, METH_VARARGS},
{"linkage_wrap", linkage_wrap, METH_VARARGS},
- {"pdist_bray_curtis_wrap", pdist_bray_curtis_wrap, METH_VARARGS},
- {"pdist_canberra_wrap", pdist_canberra_wrap, METH_VARARGS},
- {"pdist_chebyshev_wrap", pdist_chebyshev_wrap, METH_VARARGS},
- {"pdist_city_block_wrap", pdist_city_block_wrap, METH_VARARGS},
- {"pdist_cosine_wrap", pdist_cosine_wrap, METH_VARARGS},
- {"pdist_dice_bool_wrap", pdist_dice_bool_wrap, METH_VARARGS},
- {"pdist_euclidean_wrap", pdist_euclidean_wrap, METH_VARARGS},
- {"pdist_hamming_wrap", pdist_hamming_wrap, METH_VARARGS},
- {"pdist_hamming_bool_wrap", pdist_hamming_bool_wrap, METH_VARARGS},
- {"pdist_jaccard_wrap", pdist_jaccard_wrap, METH_VARARGS},
- {"pdist_jaccard_bool_wrap", pdist_jaccard_bool_wrap, METH_VARARGS},
- {"pdist_kulsinski_bool_wrap", pdist_kulsinski_bool_wrap, METH_VARARGS},
- {"pdist_mahalanobis_wrap", pdist_mahalanobis_wrap, METH_VARARGS},
- {"pdist_matching_bool_wrap", pdist_matching_bool_wrap, METH_VARARGS},
- {"pdist_minkowski_wrap", pdist_minkowski_wrap, METH_VARARGS},
- {"pdist_rogerstanimoto_bool_wrap", pdist_rogerstanimoto_bool_wrap, METH_VARARGS},
- {"pdist_russellrao_bool_wrap", pdist_russellrao_bool_wrap, METH_VARARGS},
- {"pdist_seuclidean_wrap", pdist_seuclidean_wrap, METH_VARARGS},
- {"pdist_sokalmichener_bool_wrap", pdist_sokalmichener_bool_wrap, METH_VARARGS},
- {"pdist_sokalsneath_bool_wrap", pdist_sokalsneath_bool_wrap, METH_VARARGS},
- {"pdist_yule_bool_wrap", pdist_yule_bool_wrap, METH_VARARGS},
{"prelist_wrap", prelist_wrap, METH_VARARGS},
{"to_squareform_from_vector_wrap",
to_squareform_from_vector_wrap, METH_VARARGS},
Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py 2008-06-07 08:13:02 UTC (rev 4416)
+++ trunk/scipy/cluster/tests/test_hierarchy.py 2008-06-09 05:55:44 UTC (rev 4417)
@@ -33,44 +33,14 @@
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
import sys
import os.path
-from scipy.testing import *
-from scipy.cluster.hierarchy import pdist, squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
-
import numpy
-#import math
-#from scipy.cluster.hierarchy import pdist, euclidean
+from scipy.testing import *
+from scipy.cluster.hierarchy import squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage
+from scipy.cluster.distance import pdist, matching, jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
-_filenames = ["iris.txt",
- "pdist-hamming-ml.txt",
- "pdist-boolean-inp.txt",
- "pdist-jaccard-ml.txt",
- "pdist-cityblock-ml-iris.txt",
- "pdist-minkowski-3.2-ml-iris.txt",
- "pdist-cityblock-ml.txt",
- "pdist-correlation-ml-iris.txt",
- "pdist-minkowski-5.8-ml-iris.txt",
- "pdist-correlation-ml.txt",
- "pdist-minkowski-3.2-ml.txt",
- "pdist-cosine-ml-iris.txt",
- "pdist-seuclidean-ml-iris.txt",
- "pdist-cosine-ml.txt",
- "pdist-seuclidean-ml.txt",
- "pdist-double-inp.txt",
- "pdist-spearman-ml.txt",
- "pdist-euclidean-ml.txt",
- "pdist-euclidean-ml-iris.txt",
- "pdist-chebychev-ml.txt",
- "pdist-chebychev-ml-iris.txt",
- "linkage-single-tdist.txt",
- "linkage-complete-tdist.txt",
- "linkage-average-tdist.txt",
- "linkage-weighted-tdist.txt",
- "random-bool-data.txt"]
-
_tdist = numpy.array([[0, 662, 877, 255, 412, 996],
[662, 0, 295, 468, 268, 400],
[877, 295, 0, 754, 564, 138],
@@ -80,874 +50,27 @@
_ytdist = squareform(_tdist)
-# A hashmap of expected output arrays for the tests. These arrays
-# come from a list of text files, which are read prior to testing.
eo = {}
+_filenames = ["iris.txt",
+ "linkage-single-tdist.txt",
+ "linkage-complete-tdist.txt",
+ "linkage-average-tdist.txt",
+ "linkage-weighted-tdist.txt",
+ "random-bool-data.txt"]
+
+
def load_testing_files():
for fn in _filenames:
name = fn.replace(".txt", "").replace("-ml", "")
fqfn = os.path.join(os.path.dirname(__file__), fn)
eo[name] = numpy.loadtxt(open(fqfn))
#print "%s: %s %s" % (name, str(eo[name].shape), str(eo[name].dtype))
- eo['pdist-boolean-inp'] = numpy.bool_(eo['pdist-boolean-inp'])
+ #eo['pdist-boolean-inp'] = numpy.bool_(eo['pdist-boolean-inp'])
load_testing_files()
-#print eo.keys()
-
-
-#print numpy.abs(Y_test2 - Y_right).max()
-#print numpy.abs(Y_test1 - Y_right).max()
-
-class TestPdist(TestCase):
- """
- Test suite for the pdist function.
- """
-
- ################### pdist: euclidean
- def test_pdist_euclidean_random(self):
- "Tests pdist(X, 'euclidean') on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-euclidean']
-
- Y_test1 = pdist(X, 'euclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_euclidean_random_float32(self):
- "Tests pdist(X, 'euclidean') on random data (float32)."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-euclidean']
-
- Y_test1 = pdist(X, 'euclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_euclidean_random_nonC(self):
- "Tests pdist(X, 'test_euclidean') [the non-C implementation] on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-euclidean']
- Y_test2 = pdist(X, 'test_euclidean')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_euclidean_iris_double(self):
- "Tests pdist(X, 'euclidean') on the Iris data set."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-euclidean-iris']
-
- Y_test1 = pdist(X, 'euclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_euclidean_iris_float32(self):
- "Tests pdist(X, 'euclidean') on the Iris data set. (float32)"
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-euclidean-iris']
-
- Y_test1 = pdist(X, 'euclidean')
- print numpy.abs(Y_right - Y_test1).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_euclidean_iris_nonC(self):
- "Tests pdist(X, 'test_euclidean') [the non-C implementation] on the Iris data set."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-euclidean-iris']
- Y_test2 = pdist(X, 'test_euclidean')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: seuclidean
- def test_pdist_seuclidean_random(self):
- "Tests pdist(X, 'seuclidean') on random data."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-seuclidean']
-
- Y_test1 = pdist(X, 'seuclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_seuclidean_random_float32(self):
- "Tests pdist(X, 'seuclidean') on random data (float32)."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-seuclidean']
-
- Y_test1 = pdist(X, 'seuclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_seuclidean_random_nonC(self):
- "Tests pdist(X, 'test_sqeuclidean') [the non-C implementation] on random data."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-seuclidean']
- Y_test2 = pdist(X, 'test_sqeuclidean')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_seuclidean_iris(self):
- "Tests pdist(X, 'seuclidean') on the Iris data set."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-seuclidean-iris']
-
- Y_test1 = pdist(X, 'seuclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_seuclidean_iris_float32(self):
- "Tests pdist(X, 'seuclidean') on the Iris data set (float32)."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-seuclidean-iris']
-
- Y_test1 = pdist(X, 'seuclidean')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_seuclidean_iris_nonC(self):
- "Tests pdist(X, 'test_seuclidean') [the non-C implementation] on the Iris data set."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-seuclidean-iris']
- Y_test2 = pdist(X, 'test_sqeuclidean')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: cosine
- def test_pdist_cosine_random(self):
- "Tests pdist(X, 'cosine') on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cosine']
- Y_test1 = pdist(X, 'cosine')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_cosine_random_float32(self):
- "Tests pdist(X, 'cosine') on random data. (float32)"
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-cosine']
-
- Y_test1 = pdist(X, 'cosine')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_cosine_random_nonC(self):
- "Tests pdist(X, 'test_cosine') [the non-C implementation] on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cosine']
- Y_test2 = pdist(X, 'test_cosine')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_cosine_iris(self):
- "Tests pdist(X, 'cosine') on the Iris data set."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-cosine-iris']
-
- Y_test1 = pdist(X, 'cosine')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
- #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
-
- def test_pdist_cosine_iris_float32(self):
- "Tests pdist(X, 'cosine') on the Iris data set."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-cosine-iris']
-
- Y_test1 = pdist(X, 'cosine')
- print numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
- #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
-
- def test_pdist_cosine_iris_nonC(self):
- "Tests pdist(X, 'test_cosine') [the non-C implementation] on the Iris data set."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-cosine-iris']
- Y_test2 = pdist(X, 'test_cosine')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: cityblock
- def test_pdist_cityblock_random(self):
- "Tests pdist(X, 'cityblock') on random data."
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cityblock']
- Y_test1 = pdist(X, 'cityblock')
- #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_cityblock_random_float32(self):
- "Tests pdist(X, 'cityblock') on random data. (float32)"
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-cityblock']
- Y_test1 = pdist(X, 'cityblock')
- #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_cityblock_random_nonC(self):
- "Tests pdist(X, 'test_cityblock') [the non-C implementation] on random data."
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-cityblock']
- Y_test2 = pdist(X, 'test_cityblock')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_cityblock_iris(self):
- "Tests pdist(X, 'cityblock') on the Iris data set."
- eps = 1e-14
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-cityblock-iris']
-
- Y_test1 = pdist(X, 'cityblock')
- self.failUnless(within_tol(Y_test1, Y_right, eps))
- #print "cityblock-iris", numpy.abs(Y_test1 - Y_right).max()
-
- def test_pdist_cityblock_iris_float32(self):
- "Tests pdist(X, 'cityblock') on the Iris data set. (float32)"
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-cityblock-iris']
-
- Y_test1 = pdist(X, 'cityblock')
- print "cityblock-iris-float32", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_cityblock_iris_nonC(self):
- "Tests pdist(X, 'test_cityblock') [the non-C implementation] on the Iris data set."
- eps = 1e-14
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-cityblock-iris']
- Y_test2 = pdist(X, 'test_cityblock')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: correlation
- def test_pdist_correlation_random(self):
- "Tests pdist(X, 'correlation') on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-correlation']
-
- Y_test1 = pdist(X, 'correlation')
- #print "correlation", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_correlation_random_float32(self):
- "Tests pdist(X, 'correlation') on random data. (float32)"
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-correlation']
-
- Y_test1 = pdist(X, 'correlation')
- #print "correlation", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_correlation_random_nonC(self):
- "Tests pdist(X, 'test_correlation') [the non-C implementation] on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-correlation']
- Y_test2 = pdist(X, 'test_correlation')
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_correlation_iris(self):
- "Tests pdist(X, 'correlation') on the Iris data set."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-correlation-iris']
-
- Y_test1 = pdist(X, 'correlation')
- #print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_correlation_iris_float32(self):
- "Tests pdist(X, 'correlation') on the Iris data set. (float32)"
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = numpy.float32(eo['pdist-correlation-iris'])
-
- Y_test1 = pdist(X, 'correlation')
- print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_correlation_iris_nonC(self):
- "Tests pdist(X, 'test_correlation') [the non-C implementation] on the Iris data set."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-correlation-iris']
- Y_test2 = pdist(X, 'test_correlation')
- #print "test-correlation-iris", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################# minkowski
-
- def test_pdist_minkowski_random(self):
- "Tests pdist(X, 'minkowski') on random data."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-minkowski-3.2']
-
- Y_test1 = pdist(X, 'minkowski', 3.2)
- #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_random_float32(self):
- "Tests pdist(X, 'minkowski') on random data. (float32)"
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-minkowski-3.2']
-
- Y_test1 = pdist(X, 'minkowski', 3.2)
- #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_random_nonC(self):
- "Tests pdist(X, 'test_minkowski') [the non-C implementation] on random data."
- eps = 1e-05
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-minkowski-3.2']
- Y_test2 = pdist(X, 'test_minkowski', 3.2)
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_minkowski_iris(self):
- "Tests pdist(X, 'minkowski') on iris data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test1 = pdist(X, 'minkowski', 3.2)
- #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_iris_float32(self):
- "Tests pdist(X, 'minkowski') on iris data. (float32)"
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test1 = pdist(X, 'minkowski', 3.2)
- #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_iris_nonC(self):
- "Tests pdist(X, 'test_minkowski') [the non-C implementation] on iris data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-minkowski-3.2-iris']
- Y_test2 = pdist(X, 'test_minkowski', 3.2)
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_minkowski_iris(self):
- "Tests pdist(X, 'minkowski') on iris data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-minkowski-5.8-iris']
- Y_test1 = pdist(X, 'minkowski', 5.8)
- #print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_iris_float32(self):
- "Tests pdist(X, 'minkowski') on iris data. (float32)"
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-minkowski-5.8-iris']
-
- Y_test1 = pdist(X, 'minkowski', 5.8)
- print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_minkowski_iris_nonC(self):
- "Tests pdist(X, 'test_minkowski') [the non-C implementation] on iris data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-minkowski-5.8-iris']
- Y_test2 = pdist(X, 'test_minkowski', 5.8)
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: hamming
- def test_pdist_hamming_random(self):
- "Tests pdist(X, 'hamming') on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-hamming']
-
- Y_test1 = pdist(X, 'hamming')
- #print "hamming", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_hamming_random_float32(self):
- "Tests pdist(X, 'hamming') on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
-
- Y_test1 = pdist(X, 'hamming')
- #print "hamming", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_hamming_random_nonC(self):
- "Tests pdist(X, 'test_hamming') [the non-C implementation] on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-hamming']
- Y_test2 = pdist(X, 'test_hamming')
- #print "test-hamming", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: hamming (double)
- def test_pdist_dhamming_random(self):
- "Tests pdist(X, 'hamming') on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test1 = pdist(X, 'hamming')
- #print "hamming", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_dhamming_random_float32(self):
- "Tests pdist(X, 'hamming') on random data. (float32)"
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test1 = pdist(X, 'hamming')
- #print "hamming", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_dhamming_random_nonC(self):
- "Tests pdist(X, 'test_hamming') [the non-C implementation] on random data."
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-hamming']
- Y_test2 = pdist(X, 'test_hamming')
- #print "test-hamming", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: jaccard
- def test_pdist_jaccard_random(self):
- "Tests pdist(X, 'jaccard') on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-jaccard']
-
- Y_test1 = pdist(X, 'jaccard')
- #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_jaccard_random_float32(self):
- "Tests pdist(X, 'jaccard') on random data. (float32)"
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
-
- Y_test1 = pdist(X, 'jaccard')
- #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_jaccard_random_nonC(self):
- "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-boolean-inp']
- Y_right = eo['pdist-jaccard']
- Y_test2 = pdist(X, 'test_jaccard')
- #print "test-jaccard", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: jaccard (double)
- def test_pdist_djaccard_random(self):
- "Tests pdist(X, 'jaccard') on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = numpy.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
-
- Y_test1 = pdist(X, 'jaccard')
- #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_djaccard_random_float32(self):
- "Tests pdist(X, 'jaccard') on random data. (float32)"
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
-
- Y_test1 = pdist(X, 'jaccard')
- #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_djaccard_random_nonC(self):
- "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = numpy.float64(eo['pdist-boolean-inp'])
- Y_right = eo['pdist-jaccard']
- Y_test2 = pdist(X, 'test_jaccard')
- #print "test-jaccard", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- ################### pdist: chebychev
- def test_pdist_chebychev_random(self):
- "Tests pdist(X, 'chebychev') on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-chebychev']
-
- Y_test1 = pdist(X, 'chebychev')
- #print "chebychev", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_chebychev_random_float32(self):
- "Tests pdist(X, 'chebychev') on random data. (float32)"
- eps = 1e-07
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['pdist-double-inp'])
- Y_right = eo['pdist-chebychev']
-
- Y_test1 = pdist(X, 'chebychev')
- print "chebychev", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_chebychev_random_nonC(self):
- "Tests pdist(X, 'test_chebychev') [the non-C implementation] on random data."
- eps = 1e-08
- # Get the data: the input matrix and the right output.
- X = eo['pdist-double-inp']
- Y_right = eo['pdist-chebychev']
- Y_test2 = pdist(X, 'test_chebychev')
- #print "test-chebychev", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_chebychev_iris(self):
- "Tests pdist(X, 'chebychev') on the Iris data set."
- eps = 1e-15
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-chebychev-iris']
- Y_test1 = pdist(X, 'chebychev')
- #print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_chebychev_iris_float32(self):
- "Tests pdist(X, 'chebychev') on the Iris data set. (float32)"
- eps = 1e-06
- # Get the data: the input matrix and the right output.
- X = numpy.float32(eo['iris'])
- Y_right = eo['pdist-chebychev-iris']
- Y_test1 = pdist(X, 'chebychev')
- print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
- self.failUnless(within_tol(Y_test1, Y_right, eps))
-
- def test_pdist_chebychev_iris_nonC(self):
- "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
- eps = 1e-15
- # Get the data: the input matrix and the right output.
- X = eo['iris']
- Y_right = eo['pdist-chebychev-iris']
- Y_test2 = pdist(X, 'test_chebychev')
- #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
- self.failUnless(within_tol(Y_test2, Y_right, eps))
-
- def test_pdist_matching_mtica1(self):
- "Tests matching(*,*) with mtica example #1 (nums)."
- m = matching(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = matching(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- self.failUnless(numpy.abs(m - 0.6) <= 1e-10)
- self.failUnless(numpy.abs(m2 - 0.6) <= 1e-10)
-
- def test_pdist_matching_mtica2(self):
- "Tests matching(*,*) with mtica example #2."
- m = matching(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = matching(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
-
- def test_pdist_matching_match(self):
- "Tests pdist('matching') to see if the two implementations match on random boolean input data."
- D = eo['random-bool-data']
- B = numpy.bool_(D)
- print B.shape, B.dtype
- eps = 1e-10
- y1 = pdist(B, "matching")
- y2 = pdist(B, "test_matching")
- y3 = pdist(D, "test_matching")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y1-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_jaccard_mtica1(self):
- "Tests jaccard(*,*) with mtica example #1."
- m = jaccard(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = jaccard(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- self.failUnless(numpy.abs(m - 0.6) <= 1e-10)
- self.failUnless(numpy.abs(m2 - 0.6) <= 1e-10)
-
- def test_pdist_jaccard_mtica2(self):
- "Tests jaccard(*,*) with mtica example #2."
- m = jaccard(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = jaccard(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
-
- def test_pdist_jaccard_match(self):
- "Tests pdist('jaccard') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "jaccard")
- y2 = pdist(D, "test_jaccard")
- y3 = pdist(numpy.bool_(D), "test_jaccard")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_yule_mtica1(self):
- "Tests yule(*,*) with mtica example #1."
- m = yule(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = yule(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - 2.0) <= 1e-10)
- self.failUnless(numpy.abs(m2 - 2.0) <= 1e-10)
-
- def test_pdist_yule_mtica2(self):
- "Tests yule(*,*) with mtica example #2."
- m = yule(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = yule(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - 2.0) <= 1e-10)
- self.failUnless(numpy.abs(m2 - 2.0) <= 1e-10)
-
- def test_pdist_yule_match(self):
- "Tests pdist('yule') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "yule")
- y2 = pdist(D, "test_yule")
- y3 = pdist(numpy.bool_(D), "test_yule")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_dice_mtica1(self):
- "Tests dice(*,*) with mtica example #1."
- m = dice(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = dice(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (3.0/7.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (3.0/7.0)) <= 1e-10)
-
- def test_pdist_dice_mtica2(self):
- "Tests dice(*,*) with mtica example #2."
- m = dice(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = dice(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - 0.5) <= 1e-10)
- self.failUnless(numpy.abs(m2 - 0.5) <= 1e-10)
-
- def test_pdist_dice_match(self):
- "Tests pdist('dice') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "dice")
- y2 = pdist(D, "test_dice")
- y3 = pdist(D, "test_dice")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_sokalsneath_mtica1(self):
- "Tests sokalsneath(*,*) with mtica example #1."
- m = sokalsneath(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = sokalsneath(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (3.0/4.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (3.0/4.0)) <= 1e-10)
-
- def test_pdist_sokalsneath_mtica2(self):
- "Tests sokalsneath(*,*) with mtica example #2."
- m = sokalsneath(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = sokalsneath(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
-
- def test_pdist_sokalsneath_match(self):
- "Tests pdist('sokalsneath') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "sokalsneath")
- y2 = pdist(D, "test_sokalsneath")
- y3 = pdist(numpy.bool_(D), "test_sokalsneath")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_rogerstanimoto_mtica1(self):
- "Tests rogerstanimoto(*,*) with mtica example #1."
- m = rogerstanimoto(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = rogerstanimoto(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (3.0/4.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (3.0/4.0)) <= 1e-10)
-
- def test_pdist_rogerstanimoto_mtica2(self):
- "Tests rogerstanimoto(*,*) with mtica example #2."
- m = rogerstanimoto(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = rogerstanimoto(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
-
- def test_pdist_rogerstanimoto_match(self):
- "Tests pdist('rogerstanimoto') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "rogerstanimoto")
- y2 = pdist(D, "test_rogerstanimoto")
- y3 = pdist(numpy.bool_(D), "test_rogerstanimoto")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_russellrao_mtica1(self):
- "Tests russellrao(*,*) with mtica example #1."
- m = russellrao(numpy.array([1, 0, 1, 1, 0]),
- numpy.array([1, 1, 0, 1, 1]))
- m2 = russellrao(numpy.array([1, 0, 1, 1, 0], dtype=numpy.bool),
- numpy.array([1, 1, 0, 1, 1], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (3.0/5.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (3.0/5.0)) <= 1e-10)
-
- def test_pdist_russellrao_mtica2(self):
- "Tests russellrao(*,*) with mtica example #2."
- m = russellrao(numpy.array([1, 0, 1]),
- numpy.array([1, 1, 0]))
- m2 = russellrao(numpy.array([1, 0, 1], dtype=numpy.bool),
- numpy.array([1, 1, 0], dtype=numpy.bool))
- print m
- self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
- self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
-
- def test_pdist_russellrao_match(self):
- "Tests pdist('russellrao') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "russellrao")
- y2 = pdist(D, "test_russellrao")
- y3 = pdist(numpy.bool_(D), "test_russellrao")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_sokalmichener_match(self):
- "Tests pdist('sokalmichener') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "sokalmichener")
- y2 = pdist(D, "test_sokalmichener")
- y3 = pdist(numpy.bool_(D), "test_sokalmichener")
- print numpy.abs(y1-y2).max()
- print numpy.abs(y2-y3).max()
- self.failUnless(within_tol(y1, y2, eps))
- self.failUnless(within_tol(y2, y3, eps))
-
- def test_pdist_kulsinski_match(self):
- "Tests pdist('kulsinski') to see if the two implementations match on random double input data."
- D = eo['random-bool-data']
- print D.shape, D.dtype
- eps = 1e-10
- y1 = pdist(D, "kulsinski")
- y2 = pdist(D, "test_kulsinski")
- y3 = pdist(numpy.bool_(D), "test_kulsinski")
- print numpy.abs(y1-y2).max()
- self.failUnless(within_tol(y1, y2, eps))
-
class TestSquareForm(TestCase):
################### squareform
More information about the Scipy-svn
mailing list