[Scipy-svn] r4156 - trunk/scipy/cluster
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Apr 20 15:31:22 EDT 2008
Author: stefan
Date: 2008-04-20 14:31:15 -0500 (Sun, 20 Apr 2008)
New Revision: 4156
Modified:
trunk/scipy/cluster/hierarchy.py
Log:
Use np.dtype instead of 'dtype'. Import numpy as np.
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py 2008-04-20 18:24:29 UTC (rev 4155)
+++ trunk/scipy/cluster/hierarchy.py 2008-04-20 19:31:15 UTC (rev 4156)
@@ -175,12 +175,15 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
-import _hierarchy_wrap, scipy, numpy, types, math, sys, scipy.stats
+import numpy as np
+import _hierarchy_wrap, scipy, types, math, sys, scipy.stats
-_cpy_non_euclid_methods = {'single': 0, 'complete': 1, 'average': 2, 'weighted': 6}
+_cpy_non_euclid_methods = {'single': 0, 'complete': 1, 'average': 2,
+ 'weighted': 6}
_cpy_euclid_methods = {'centroid': 3, 'median': 4, 'ward': 5}
-_cpy_linkage_methods = set(_cpy_non_euclid_methods.keys()).union(set(_cpy_euclid_methods.keys()))
-_array_type = type(numpy.array([]))
+_cpy_linkage_methods = set(_cpy_non_euclid_methods.keys()).union(
+ set(_cpy_euclid_methods.keys()))
+_array_type = np.ndarray
try:
import warnings
@@ -196,7 +199,7 @@
observation vectors, represented by a matrix where the rows are the
observations.
"""
- #n = numpy.double(X.shape[1])
+ #n = np.double(X.shape[1])
return scipy.stats.var(X, axis=0) # * n / (n - 1.0)
def _copy_array_if_base_present(a):
@@ -205,8 +208,8 @@
"""
if a.base is not None:
return a.copy()
- elif (a.dtype == 'float32'):
- return numpy.float64(a)
+ elif (a.dtype == np.float32):
+ return np.float64(a)
else:
return a
@@ -231,7 +234,7 @@
pnts * (pnts - 1) / 2 sized vector is returned.
"""
if pnts >= 2:
- D = numpy.random.rand(pnts * (pnts - 1) / 2)
+ D = np.random.rand(pnts * (pnts - 1) / 2)
else:
raise ValueError("The number of points in the distance matrix must be at least 2.")
return D
@@ -456,13 +459,13 @@
s = y.shape
if len(s) == 1:
is_valid_y(y, throw=True, name='y')
- d = numpy.ceil(numpy.sqrt(s[0] * 2))
+ d = np.ceil(np.sqrt(s[0] * 2))
if method not in _cpy_non_euclid_methods.keys():
raise ValueError("Valid methods when the raw observations are omitted are 'single', 'complete', 'weighted', and 'average'.")
# Since the C code does not support striding using strides.
[y] = _copy_arrays_if_base_present([y])
- Z = numpy.zeros((d - 1, 4))
+ Z = np.zeros((d - 1, 4))
_hierarchy_wrap.linkage_wrap(y, Z, int(d), \
int(_cpy_non_euclid_methods[method]))
elif len(s) == 2:
@@ -473,14 +476,14 @@
raise ValueError('Invalid method: %s' % method)
if method in _cpy_non_euclid_methods.keys():
dm = pdist(X, metric)
- Z = numpy.zeros((n - 1, 4))
+ Z = np.zeros((n - 1, 4))
_hierarchy_wrap.linkage_wrap(dm, Z, n, \
int(_cpy_non_euclid_methods[method]))
elif method in _cpy_euclid_methods.keys():
if metric != 'euclidean':
raise ValueError('Method %s requires the distance metric to be euclidean' % s)
dm = pdist(X, metric)
- Z = numpy.zeros((n - 1, 4))
+ Z = np.zeros((n - 1, 4))
_hierarchy_wrap.linkage_euclid_wrap(dm, Z, X, m, n,
int(_cpy_euclid_methods[method]))
return Z
@@ -579,8 +582,8 @@
n = self.count
curNode = [None] * (2 * n)
- lvisited = numpy.zeros((2 * n,), dtype='bool')
- rvisited = numpy.zeros((2 * n,), dtype='bool')
+ lvisited = np.zeros((2 * n,), dtype=bool)
+ rvisited = np.zeros((2 * n,), dtype=bool)
curNode[0] = self
k = 0
preorder = []
@@ -648,7 +651,7 @@
# If we encounter a cluster being combined more than once, the matrix
# must be corrupt.
- if len(numpy.unique(Z[:, 0:2].reshape((2 * (n - 1),)))) != 2 * (n - 1):
+ if len(np.unique(Z[:, 0:2].reshape((2 * (n - 1),)))) != 2 * (n - 1):
raise ValueError('Corrupt matrix Z. Some clusters are more than once.')
# If a cluster index is out of bounds, report an error.
if (Z[:, 0:2] >= 2 * n - 1).any():
@@ -723,7 +726,7 @@
if type(X) is not _array_type:
raise TypeError('The parameter passed must be an array.')
- if X.dtype != 'double':
+ if X.dtype != np.double:
raise TypeError('A double array must be passed.')
s = X.shape
@@ -733,7 +736,7 @@
# Grab the closest value to the square root of the number
# of elements times 2 to see if the number of elements
# is indeed a binomial coefficient.
- d = int(numpy.ceil(numpy.sqrt(X.shape[0] * 2)))
+ d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
print d, s[0]
# Check that v is of valid dimensions.
@@ -741,7 +744,7 @@
raise ValueError('Incompatible vector size. It must be a binomial coefficient n choose 2 for some integer n >= 2.')
# Allocate memory for the distance matrix.
- M = numpy.zeros((d, d), 'double')
+ M = np.zeros((d, d), 'double')
# Since the C code does not support striding using strides.
# The dimensions are used instead.
@@ -759,7 +762,7 @@
if s[0] != s[1]:
raise ValueError('The matrix argument must be square.')
if checks:
- if numpy.sum(numpy.sum(X == X.transpose())) != numpy.product(X.shape):
+ if np.sum(np.sum(X == X.transpose())) != np.product(X.shape):
raise ValueError('The distance matrix must be symmetrical.')
if (X.diagonal() != 0).any():
raise ValueError('The distance matrix must have zeros along the diagonal.')
@@ -768,7 +771,7 @@
d = s[0]
# Create a vector.
- v = numpy.zeros(((d * (d - 1) / 2),), 'double')
+ v = np.zeros(((d * (d - 1) / 2),), 'double')
# Since the C code does not support striding using strides.
# The dimensions are used instead.
@@ -800,8 +803,8 @@
Computes the Euclidean distance between two n-vectors u and v, ||u-v||_2
"""
- q=numpy.matrix(u-v)
- return numpy.sqrt((q*q.T).sum())
+ q=np.matrix(u-v)
+ return np.sqrt((q*q.T).sum())
def sqeuclidean(u, v):
"""
@@ -820,7 +823,7 @@
(1-uv^T)/(||u||_2 * ||v||_2).
"""
return (1.0 - (scipy.dot(u, v.T) / \
- (numpy.sqrt(scipy.dot(u, u.T)) * numpy.sqrt(scipy.dot(v, v.T)))))
+ (np.sqrt(scipy.dot(u, u.T)) * np.sqrt(scipy.dot(v, v.T)))))
def correlation(u, v):
"""
@@ -840,8 +843,8 @@
um = u - umu
vm = v - vmu
return 1.0 - (scipy.dot(um, vm) /
- (numpy.sqrt(scipy.dot(um, um)) \
- * numpy.sqrt(scipy.dot(vm, vm))))
+ (np.sqrt(scipy.dot(um, um)) \
+ * np.sqrt(scipy.dot(vm, vm))))
def hamming(u, v):
"""
@@ -878,7 +881,7 @@
for k < n.
"""
- return numpy.double(scipy.bitwise_and((u != v), scipy.bitwise_or(u != 0, v != 0)).sum()) / numpy.double(scipy.bitwise_or(u != 0, v != 0).sum())
+ return np.double(scipy.bitwise_and((u != v), scipy.bitwise_or(u != 0, v != 0)).sum()) / np.double(scipy.bitwise_or(u != 0, v != 0).sum())
def kulsinski(u, v):
"""
@@ -911,7 +914,7 @@
"""
if type(V) is not _array_type or len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
raise TypeError('V must be a 1-D numpy array of doubles of the same dimension as u and v.')
- return numpy.sqrt(((u-v)**2 / V).sum())
+ return np.sqrt(((u-v)**2 / V).sum())
def cityblock(u, v):
"""
@@ -932,7 +935,7 @@
"""
if type(V) is not _array_type:
raise TypeError('V must be a 1-D numpy array of doubles of the same dimension as u and v.')
- return numpy.sqrt(scipy.dot(scipy.dot((u-v),VI),(u-v).T).sum())
+ return np.sqrt(scipy.dot(scipy.dot((u-v),VI),(u-v).T).sum())
def chebyshev(u, v):
"""
@@ -1276,7 +1279,7 @@
Euclidean distance between the vectors could be computed
as follows,
- dm = pdist(X, (lambda u, v: numpy.sqrt(((u-v)*(u-v).T).sum())))
+ dm = pdist(X, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
Note that you should avoid passing a reference to one of
the distance functions defined in this library. For example,
@@ -1301,7 +1304,7 @@
if type(X) is not _array_type:
raise TypeError('The parameter passed must be an array.')
- if X.dtype == 'float32' or X.dtype == 'float96':
+ if X.dtype == np.float32 or X.dtype == np.float96:
raise TypeError('Floating point arrays must be 64-bit.')
# The C code doesn't do striding.
@@ -1314,7 +1317,7 @@
m = s[0]
n = s[1]
- dm = numpy.zeros((m * (m - 1) / 2,), dtype='double')
+ dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
mtype = type(metric)
if mtype is types.FunctionType:
@@ -1343,7 +1346,8 @@
elif mtype is types.StringType:
mstr = metric.lower()
- if X.dtype != 'double' and (mstr != 'hamming' and mstr != 'jaccard'):
+ if X.dtype != np.double and \
+ (mstr != 'hamming' and mstr != 'jaccard'):
TypeError('A double array must be passed.')
if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
_hierarchy_wrap.pdist_euclidean_wrap(X, dm)
@@ -1353,19 +1357,21 @@
elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
_hierarchy_wrap.pdist_city_block_wrap(X, dm)
elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
- if X.dtype == 'double':
+ if X.dtype == np.double:
_hierarchy_wrap.pdist_hamming_wrap(X, dm)
- elif X.dtype == 'bool':
+ elif X.dtype == bool:
_hierarchy_wrap.pdist_hamming_bool_wrap(X, dm)
else:
- raise TypeError('Invalid input array value type %s for hamming.' % str(X.dtype))
+ raise TypeError('Invalid input array value type %s '
+ 'for hamming.' % str(X.dtype))
elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
- if X.dtype == 'double':
+ if X.dtype == np.double:
_hierarchy_wrap.pdist_jaccard_wrap(X, dm)
- elif X.dtype == 'bool':
+ elif X.dtype == np.bool:
_hierarchy_wrap.pdist_jaccard_bool_wrap(X, dm)
else:
- raise TypeError('Invalid input array value type %s for jaccard.' % str(X.dtype))
+ raise TypeError('Invalid input array value type %s for '
+ 'jaccard.' % str(X.dtype))
elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
_hierarchy_wrap.pdist_chebyshev_wrap(X, dm)
elif mstr in set(['minkowski', 'mi', 'm']):
@@ -1374,7 +1380,7 @@
if V is not None:
if type(V) is not _array_type:
raise TypeError('Variance vector V must be a numpy array')
- if V.dtype != 'float64':
+ if V.dtype != np.float64:
raise TypeError('Variance vector V must contain doubles.')
if len(V.shape) != 1:
raise ValueError('Variance vector V must be one-dimensional.')
@@ -1390,33 +1396,33 @@
# subtract matrices in a similar way to multiplying them?
# Need to get rid of as much unnecessary C code as possible.
elif mstr in set(['cosine_old', 'cos_old']):
- norms = numpy.sqrt(numpy.sum(X * X, axis=1))
+ norms = np.sqrt(np.sum(X * X, axis=1))
_hierarchy_wrap.pdist_cosine_wrap(X, dm, norms)
elif mstr in set(['cosine', 'cos']):
- norms = numpy.sqrt(numpy.sum(X * X, axis=1))
+ norms = np.sqrt(np.sum(X * X, axis=1))
nV = norms.reshape(m, 1)
# The numerator u * v
- nm = numpy.dot(X, X.T)
+ nm = np.dot(X, X.T)
# The denom. ||u||*||v||
- de = numpy.dot(nV, nV.T);
+ de = np.dot(nV, nV.T);
dm = 1 - (nm / de)
dm[xrange(0,m),xrange(0,m)] = 0
dm = squareform(dm)
elif mstr in set(['correlation', 'co']):
- X2 = X - X.mean(1)[:,numpy.newaxis]
- #X2 = X - numpy.matlib.repmat(numpy.mean(X, axis=1).reshape(m, 1), 1, n)
- norms = numpy.sqrt(numpy.sum(X2 * X2, axis=1))
+ X2 = X - X.mean(1)[:,np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ norms = np.sqrt(np.sum(X2 * X2, axis=1))
_hierarchy_wrap.pdist_cosine_wrap(X2, dm, norms)
elif mstr in set(['mahalanobis', 'mahal', 'mah']):
if VI is not None:
if type(VI) != _array_type:
raise TypeError('VI must be a numpy array.')
- if VI.dtype != 'float64':
+ if VI.dtype != np.float64:
raise TypeError('The array must contain 64-bit floats.')
[VI] = _copy_arrays_if_base_present([VI])
else:
- V = numpy.cov(X.T)
- VI = numpy.linalg.inv(V).T.copy()
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V).T.copy()
# (u-v)V^(-1)(u-v)^T
_hierarchy_wrap.pdist_mahalanobis_wrap(X, VI, dm)
elif mstr == 'canberra':
@@ -1449,8 +1455,8 @@
dm = pdist(X, braycurtis)
elif metric == 'test_mahalanobis':
if VI is None:
- V = numpy.cov(X.T)
- VI = numpy.linalg.inv(V)
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
[VI] = _copy_arrays_if_base_present([VI])
# (u-v)V^(-1)(u-v)^T
dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
@@ -1523,7 +1529,7 @@
Zs = Z.shape
n = Zs[0] + 1
- zz = numpy.zeros((n*(n-1)/2,), dtype='double')
+ zz = np.zeros((n*(n-1)/2,), dtype=np.double)
# Since the C code does not support striding using strides.
# The dimensions are used instead.
[Z] = _copy_arrays_if_base_present([Z])
@@ -1544,7 +1550,7 @@
numerator = (Yy * Zz)
denomA = Yy ** 2
denomB = Zz ** 2
- c = numerator.sum() / numpy.sqrt((denomA.sum() * denomB.sum()))
+ c = numerator.sum() / np.sqrt((denomA.sum() * denomB.sum()))
#print c, numerator.sum()
if nargs == 2:
return c
@@ -1573,7 +1579,7 @@
Zs = Z.shape
is_valid_linkage(Z, throw=True, name='Z')
- if (not d == numpy.floor(d)) or d < 0:
+ if (not d == np.floor(d)) or d < 0:
raise ValueError('The second argument d must be a nonnegative integer value.')
# if d == 0:
# d = 1
@@ -1583,7 +1589,7 @@
[Z] = _copy_arrays_if_base_present([Z])
n = Zs[0] + 1
- R = numpy.zeros((n - 1, 4), dtype='double')
+ R = np.zeros((n - 1, 4), dtype=np.double)
_hierarchy_wrap.inconsistent_wrap(Z, R, int(n), int(d));
return R
@@ -1608,12 +1614,12 @@
Zd = Z[:,2].reshape(Zs[0], 1)
if Zpart.min() != 1.0 and Zpart.max() != 2 * Zs[0]:
raise ValueError('The format of the indices is not 1..N');
- CS = numpy.zeros((Zs[0], 1), dtype='double')
+ CS = np.zeros((Zs[0], 1), dtype=np.double)
Zpart = Zpart - 1
- _hierarchy_wrap.calculate_cluster_sizes_wrap(numpy.hstack([Zpart, \
+ _hierarchy_wrap.calculate_cluster_sizes_wrap(np.hstack([Zpart, \
Zd]).copy(), \
CS, int(Zs[0]) + 1)
- return numpy.hstack([Zpart, Zd, CS]).copy()
+ return np.hstack([Zpart, Zd, CS]).copy()
def to_mlab_linkage(Z):
"""
@@ -1626,7 +1632,7 @@
"""
is_valid_linkage(Z, throw=True, name='Z')
- return numpy.hstack([Z[:,0:2] + 1, Z[:,2]])
+ return np.hstack([Z[:,0:2] + 1, Z[:,2]])
def is_monotonic(Z):
"""
@@ -1657,7 +1663,7 @@
raise TypeError('Variable \'%s\' passed as inconsistency matrix is not a numpy array.' % name)
else:
raise TypeError('Variable passed as inconsistency matrix is not a numpy array.')
- if R.dtype != 'double':
+ if R.dtype != np.double:
if name:
raise TypeError('Inconsistency matrix \'%s\' must contain doubles (float64).' % name)
else:
@@ -1716,7 +1722,7 @@
raise TypeError('\'%s\' passed as a linkage is not a valid array.' % name)
else:
raise TypeError('Variable is not a valid array.')
- if Z.dtype != 'double':
+ if Z.dtype != np.double:
if name:
raise TypeError('Linkage matrix \'%s\' must contain doubles (float64).' % name)
else:
@@ -1776,7 +1782,7 @@
raise TypeError('\'%s\' passed as a condensed distance matrix is not a numpy array.' % name)
else:
raise TypeError('Variable is not a numpy array.')
- if y.dtype != 'double':
+ if y.dtype != np.double:
if name:
raise TypeError('Condensed distance matrix \'%s\' must contain doubles (float64).' % name)
else:
@@ -1787,7 +1793,7 @@
else:
raise ValueError('Condensed distance matrix must have shape=1 (i.e. be one-dimensional).')
n = y.shape[0]
- d = int(numpy.ceil(numpy.sqrt(n * 2)))
+ d = int(np.ceil(np.sqrt(n * 2)))
if (d*(d-1)/2) != n:
if name:
raise ValueError('Length n of condensed distance matrix \'%s\' must be a binomial coefficient, i.e. there must be a k such that (k \choose 2)=n)!' % name)
@@ -1838,7 +1844,7 @@
raise TypeError('\'%s\' passed as a distance matrix is not a numpy array.' % name)
else:
raise TypeError('Variable is not a numpy array.')
- if D.dtype != 'double':
+ if D.dtype != np.double:
if name:
raise TypeError('Distance matrix \'%s\' must contain doubles (float64).' % name)
else:
@@ -1904,7 +1910,7 @@
condensed distance matrix Y.
"""
is_valid_y(y, throw=True, name='Y')
- d = int(numpy.ceil(numpy.sqrt(y.shape[0] * 2)))
+ d = int(np.ceil(np.sqrt(y.shape[0] * 2)))
return d
def Z_y_correspond(Z, Y):
@@ -1979,7 +1985,7 @@
is_valid_linkage(Z, throw=True, name='Z')
n = Z.shape[0] + 1
- T = numpy.zeros((n,), dtype='int32')
+ T = np.zeros((n,), dtype=np.int32)
# Since the C code does not support striding using strides.
# The dimensions are used instead.
@@ -2078,7 +2084,7 @@
"""
is_valid_linkage(Z, throw=True, name='Z')
n = Z.shape[0] + 1
- ML = numpy.zeros((n,), dtype='int32')
+ ML = np.zeros((n,), dtype=np.int32)
[Z] = _copy_arrays_if_base_present([Z])
_hierarchy_wrap.prelist_wrap(Z, ML, int(n))
return ML
@@ -2917,7 +2923,7 @@
is_valid_linkage(Z, throw=True, name='Z')
n = Z.shape[0] + 1
- MD = numpy.zeros((n-1,))
+ MD = np.zeros((n-1,))
[Z] = _copy_arrays_if_base_present([Z])
_hierarchy_wrap.get_max_dist_for_each_hierarchy_wrap(Z, MD, int(n))
return MD
@@ -2935,7 +2941,7 @@
is_valid_im(R, throw=True, name='R')
n = Z.shape[0] + 1
- MI = numpy.zeros((n-1,))
+ MI = np.zeros((n-1,))
[Z, R] = _copy_arrays_if_base_present([Z, R])
_hierarchy_wrap.get_max_Rfield_for_each_hierarchy_wrap(Z, R, MI, int(n), 3)
return MI
@@ -2957,7 +2963,7 @@
return ValueError('i must be an integer between 0 and 3 inclusive.')
n = Z.shape[0] + 1
- MR = numpy.zeros((n-1,))
+ MR = np.zeros((n-1,))
[Z, R] = _copy_arrays_if_base_present([Z, R])
_hierarchy_wrap.get_max_Rfield_for_each_hierarchy_wrap(Z, R, MR, int(n), i)
return MR
@@ -2984,16 +2990,16 @@
i < n, i corresponds to an original observation, otherwise it
corresponds to a non-singleton cluster.
"""
- if type(T) != _array_type or T.dtype != 'int':
+ if type(T) != _array_type or T.dtype != np.int:
raise TypeError('T must be a one-dimensional numpy array of integers.')
is_valid_linkage(Z, throw=True, name='Z')
if len(T) != Z.shape[0] + 1:
raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.')
- Cl = numpy.unique(T)
+ Cl = np.unique(T)
kk = len(Cl)
- L = numpy.zeros((kk,), dtype='int32')
- M = numpy.zeros((kk,), dtype='int32')
+ L = np.zeros((kk,), dtype=np.int32)
+ M = np.zeros((kk,), dtype=np.int32)
n = Z.shape[0] + 1
[Z, T] = _copy_arrays_if_base_present([Z, T])
s = _hierarchy_wrap.leaders_wrap(Z, T, L, M, int(kk), int(n))
More information about the Scipy-svn
mailing list