[Scipy-svn] r4957 - in trunk/scipy/spatial: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Mon Nov 3 04:06:59 EST 2008
Author: peridot
Date: 2008-11-03 03:06:52 -0600 (Mon, 03 Nov 2008)
New Revision: 4957
Added:
trunk/scipy/spatial/ckdtree.c
trunk/scipy/spatial/ckdtree.pyx
trunk/scipy/spatial/info.py
trunk/scipy/spatial/kdtree.py
trunk/scipy/spatial/tests/test_kdtree.py
Removed:
trunk/scipy/spatial/info.py
Modified:
trunk/scipy/spatial/__init__.py
trunk/scipy/spatial/setup.py
Log:
Merged kdtree code. I think scons support is broken.
Modified: trunk/scipy/spatial/__init__.py
===================================================================
--- trunk/scipy/spatial/__init__.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/__init__.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -3,8 +3,11 @@
#
from info import __doc__
+from kdtree import *
+from ckdtree import *
-__all__ = ['distance']
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ += ['distance']
import distance
from numpy.testing import Tester
Copied: trunk/scipy/spatial/ckdtree.c (from rev 4956, branches/spatial/scipy/spatial/ckdtree.c)
Property changes on: trunk/scipy/spatial/ckdtree.c
___________________________________________________________________
Name: svn:mergeinfo
+
Copied: trunk/scipy/spatial/ckdtree.pyx (from rev 4956, branches/spatial/scipy/spatial/ckdtree.pyx)
Deleted: trunk/scipy/spatial/info.py
===================================================================
--- trunk/scipy/spatial/info.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/info.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -1,8 +0,0 @@
-"""
-Distance Computation
-====================
-
- The distance module provides functions for computing distances between
- pairs of vectors from a set of observation vectors.
-
-"""
Copied: trunk/scipy/spatial/info.py (from rev 4956, branches/spatial/scipy/spatial/info.py)
===================================================================
--- branches/spatial/scipy/spatial/info.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/info.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,12 @@
+"""
+Spatial data structures and algorithms
+======================================
+
+Nearest-neighbor queries:
+
+ KDTree -- class for efficient nearest-neighbor queries
+ distance -- module containing many different distance measures
+
+"""
+
+postpone_import = 1
Property changes on: trunk/scipy/spatial/info.py
___________________________________________________________________
Name: svn:mergeinfo
+
Copied: trunk/scipy/spatial/kdtree.py (from rev 4956, branches/spatial/scipy/spatial/kdtree.py)
===================================================================
--- branches/spatial/scipy/spatial/kdtree.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/kdtree.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,675 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import numpy as np
+from heapq import heappush, heappop
+import scipy.sparse
+
+def minkowski_distance_p(x,y,p=2):
+ """Compute the pth power of the L**p distance between x and y
+
+ For efficiency, this function computes the L**p distance but does
+ not extract the pth root. If p is 1 or infinity, this is equal to
+ the actual L**p distance.
+ """
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p==np.inf:
+ return np.amax(np.abs(y-x),axis=-1)
+ elif p==1:
+ return np.sum(np.abs(y-x),axis=-1)
+ else:
+ return np.sum(np.abs(y-x)**p,axis=-1)
+def minkowski_distance(x,y,p=2):
+ """Compute the L**p distance between x and y"""
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p==np.inf or p==1:
+ return minkowski_distance_p(x,y,p)
+ else:
+ return minkowski_distance_p(x,y,p)**(1./p)
+
+class Rectangle(object):
+ """Hyperrectangle class.
+
+ Represents a Cartesian product of intervals.
+ """
+ def __init__(self, maxes, mins):
+ """Construct a hyperrectangle."""
+ self.maxes = np.maximum(maxes,mins).astype(np.float)
+ self.mins = np.minimum(maxes,mins).astype(np.float)
+ self.m, = self.maxes.shape
+
+ def __repr__(self):
+ return "<Rectangle %s>" % zip(self.mins, self.maxes)
+
+ def volume(self):
+ """Total volume."""
+ return np.prod(self.maxes-self.mins)
+
+ def split(self, d, split):
+ """Produce two hyperrectangles by splitting along axis d.
+
+ In general, if you need to compute maximum and minimum
+ distances to the children, it can be done more efficiently
+ by updating the maximum and minimum distances to the parent.
+ """ # FIXME: do this
+ mid = np.copy(self.maxes)
+ mid[d] = split
+ less = Rectangle(self.mins, mid)
+ mid = np.copy(self.mins)
+ mid[d] = split
+ greater = Rectangle(mid, self.maxes)
+ return less, greater
+
+ def min_distance_point(self, x, p=2.):
+ """Compute the minimum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-x,x-self.maxes)),p)
+
+ def max_distance_point(self, x, p=2.):
+ """Compute the maximum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(self.maxes-x,x-self.mins),p)
+
+ def min_distance_rectangle(self, other, p=2.):
+ """Compute the minimum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-other.maxes,other.mins-self.maxes)),p)
+
+ def max_distance_rectangle(self, other, p=2.):
+ """Compute the maximum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(self.maxes-other.mins,other.maxes-self.mins),p)
+
+
+class KDTree(object):
+ """kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary trie, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+
+ The tree also supports all-neighbors queries, both with arrays of points
+ and with other kd-trees. These do use a reasonably efficient algorithm,
+ but the kd-tree is not necessarily the best data structure for this
+ sort of calculation.
+ """
+
+ def __init__(self, data, leafsize=10):
+ """Construct a kd-tree.
+
+ Parameters:
+ ===========
+
+ data : array-like, shape (n,k)
+ The data points to be indexed. This array is not copied, and
+ so modifying this data will result in bogus results.
+ leafsize : positive integer
+ The number of points at which the algorithm switches over to
+ brute-force.
+ """
+ self.data = np.asarray(data)
+ self.n, self.m = np.shape(self.data)
+ self.leafsize = int(leafsize)
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.amax(self.data,axis=0)
+ self.mins = np.amin(self.data,axis=0)
+
+ self.tree = self.__build(np.arange(self.n), self.maxes, self.mins)
+
+ class node(object):
+ pass
+ class leafnode(node):
+ def __init__(self, idx):
+ self.idx = idx
+ self.children = len(idx)
+ class innernode(node):
+ def __init__(self, split_dim, split, less, greater):
+ self.split_dim = split_dim
+ self.split = split
+ self.less = less
+ self.greater = greater
+ self.children = less.children+greater.children
+
+ def __build(self, idx, maxes, mins):
+ if len(idx)<=self.leafsize:
+ return KDTree.leafnode(idx)
+ else:
+ data = self.data[idx]
+ #maxes = np.amax(data,axis=0)
+ #mins = np.amin(data,axis=0)
+ d = np.argmax(maxes-mins)
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ return KDTree.leafnode(idx)
+ data = data[:,d]
+
+ # sliding midpoint rule; see Maneewongvatana and Mount 1999
+ # for arguments that this is a good idea.
+ split = (maxval+minval)/2
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(less_idx)==0:
+ split = np.amin(data)
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(greater_idx)==0:
+ split = np.amax(data)
+ less_idx = np.nonzero(data<split)[0]
+ greater_idx = np.nonzero(data>=split)[0]
+ if len(less_idx)==0:
+ # _still_ zero? all must have the same value
+ assert np.all(data==data[0]), "Troublesome data array: %s" % data
+ split = data[0]
+ less_idx = np.arange(len(data)-1)
+ greater_idx = np.array([len(data)-1])
+
+ lessmaxes = np.copy(maxes)
+ lessmaxes[d] = split
+ greatermins = np.copy(mins)
+ greatermins[d] = split
+ return KDTree.innernode(d, split,
+ self.__build(idx[less_idx],lessmaxes,mins),
+ self.__build(idx[greater_idx],maxes,greatermins))
+
+ def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+
+ side_distances = np.maximum(0,np.maximum(x-self.maxes,self.mins-x))
+ if p!=np.inf:
+ side_distances**=p
+ min_distance = np.sum(side_distances)
+ else:
+ min_distance = np.amax(side_distances)
+
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ q = [(min_distance,
+ tuple(side_distances),
+ self.tree)]
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ neighbors = []
+
+ if eps==0:
+ epsfac=1
+ elif p==np.inf:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ if p!=np.inf and distance_upper_bound!=np.inf:
+ distance_upper_bound = distance_upper_bound**p
+
+ while q:
+ min_distance, side_distances, node = heappop(q)
+ if isinstance(node, KDTree.leafnode):
+ # brute-force
+ data = self.data[node.idx]
+ ds = minkowski_distance_p(data,x[np.newaxis,:],p)
+ for i in range(len(ds)):
+ if ds[i]<distance_upper_bound:
+ if len(neighbors)==k:
+ heappop(neighbors)
+ heappush(neighbors, (-ds[i], node.idx[i]))
+ if len(neighbors)==k:
+ distance_upper_bound = -neighbors[0][0]
+ else:
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ break
+ # compute minimum distances to the children and push them on
+ if x[node.split_dim]<node.split:
+ near, far = node.less, node.greater
+ else:
+ near, far = node.greater, node.less
+
+ # near child is at the same distance as the current node
+ heappush(q,(min_distance, side_distances, near))
+
+ # far child is further by an amount depending only
+ # on the split value
+ sd = list(side_distances)
+ if p == np.inf:
+ min_distance = max(min_distance, abs(node.split-x[node.split_dim]))
+ elif p == 1:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+ else:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+
+ # far child might be too far, if so, don't bother pushing it
+ if min_distance<=distance_upper_bound*epsfac:
+ heappush(q,(min_distance, tuple(sd), far))
+
+ if p==np.inf:
+ return sorted([(-d,i) for (d,i) in neighbors])
+ else:
+ return sorted([((-d)**(1./p),i) for (d,i) in neighbors])
+
+ def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+ """query the kd-tree for nearest neighbors
+
+ Parameters:
+ ===========
+
+ x : array-like, last dimension self.m
+ An array of points to query.
+ k : integer
+ The number of nearest neighbors to return.
+ eps : nonnegative float
+ Return approximate nearest neighbors; the kth returned value
+ is guaranteed to be no further than (1+eps) times the
+ distance to the real kth nearest neighbor.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance
+ 2 is the usual Euclidean distance
+ infinity is the maximum-coordinate-difference distance
+ distance_upper_bound : nonnegative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns:
+ ========
+
+ d : array of floats
+ The distances to the nearest neighbors.
+ If x has shape tuple+(self.m,), then d has shape tuple if
+ k is one, or tuple+(k,) if k is larger than one. Missing
+ neighbors are indicated with infinite distances. If k is None,
+ then d is an object array of shape tuple, containing lists
+ of distances. In either case the hits are sorted by distance
+ (nearest first).
+ i : array of integers
+ The locations of the neighbors in self.data. i is the same
+ shape as d.
+ """
+ x = np.asarray(x)
+ if np.shape(x)[-1] != self.m:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ retshape = np.shape(x)[:-1]
+ if retshape!=():
+ if k>1:
+ dd = np.empty(retshape+(k,),dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape+(k,),dtype=np.int)
+ ii.fill(self.n)
+ elif k==1:
+ dd = np.empty(retshape,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape,dtype=np.int)
+ ii.fill(self.n)
+ elif k is None:
+ dd = np.empty(retshape,dtype=np.object)
+ ii = np.empty(retshape,dtype=np.object)
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+ for c in np.ndindex(retshape):
+ hits = self.__query(x[c], k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k>1:
+ for j in range(len(hits)):
+ dd[c+(j,)], ii[c+(j,)] = hits[j]
+ elif k==1:
+ if len(hits)>0:
+ dd[c], ii[c] = hits[0]
+ else:
+ dd[c] = np.inf
+ ii[c] = self.n
+ elif k is None:
+ dd[c] = [d for (d,i) in hits]
+ ii[c] = [i for (d,i) in hits]
+ return dd, ii
+ else:
+ hits = self.__query(x, k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k==1:
+ if len(hits)>0:
+ return hits[0]
+ else:
+ return np.inf, self.n
+ elif k>1:
+ dd = np.empty(k,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(k,dtype=np.int)
+ ii.fill(self.n)
+ for j in range(len(hits)):
+ dd[j], ii[j] = hits[j]
+ return dd, ii
+ elif k is None:
+ return [d for (d,i) in hits], [i for (d,i) in hits]
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+
+
+ def __query_ball_point(self, x, r, p=2., eps=0):
+ R = Rectangle(self.maxes, self.mins)
+
+ def traverse_checking(node, rect):
+ if rect.min_distance_point(x,p)>=r/(1.+eps):
+ return []
+ elif rect.max_distance_point(x,p)<r*(1.+eps):
+ return traverse_no_checking(node)
+ elif isinstance(node, KDTree.leafnode):
+ d = self.data[node.idx]
+ return node.idx[minkowski_distance(d,x,p)<=r].tolist()
+ else:
+ less, greater = rect.split(node.split_dim, node.split)
+ return traverse_checking(node.less, less)+traverse_checking(node.greater, greater)
+ def traverse_no_checking(node):
+ if isinstance(node, KDTree.leafnode):
+
+ return node.idx.tolist()
+ else:
+ return traverse_no_checking(node.less)+traverse_no_checking(node.greater)
+
+ return traverse_checking(self.tree, R)
+
+ def query_ball_point(self, x, r, p=2., eps=0):
+ """Find all points within r of x
+
+ Parameters
+ ==========
+
+ x : array_like, shape tuple + (self.m,)
+ The point or points to search for neighbors of
+ r : positive float
+ The radius of points to return
+ p : float 1<=p<=infinity
+ Which Minkowski p-norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : list or array of lists
+ If x is a single point, returns a list of the indices of the neighbors
+ of x. If x is an array of points, returns an object array of shape tuple
+ containing lists of neighbors.
+
+
+ Note: if you have many points whose neighbors you want to find, you may save
+ substantial amounts of time by putting them in a KDTree and using query_ball_tree.
+ """
+ x = np.asarray(x)
+ if x.shape[-1]!=self.m:
+ raise ValueError("Searching for a %d-dimensional point in a %d-dimensional KDTree" % (x.shape[-1],self.m))
+ if len(x.shape)==1:
+ return self.__query_ball_point(x,r,p,eps)
+ else:
+ retshape = x.shape[:-1]
+ result = np.empty(retshape,dtype=np.object)
+ for c in np.ndindex(retshape):
+ result[c] = self.__query_ball_point(x[c], r, p=p, eps=eps)
+ return result
+
+ def query_ball_tree(self, other, r, p=2., eps=0):
+ """Find all pairs of points whose distance is at most r
+
+ Parameters
+ ==========
+
+ other : KDTree
+ The tree containing points to search against
+ r : positive float
+ The maximum distance
+ p : float 1<=p<=infinity
+ Which Minkowski norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : list of lists
+ For each element self.data[i] of this tree, results[i] is a list of the
+ indices of its neighbors in other.data.
+ """
+ results = [[] for i in range(self.n)]
+ def traverse_checking(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+ return
+ elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+ traverse_no_checking(node1, node2)
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ d = other.data[node2.idx]
+ for i in node1.idx:
+ results[i] += node2.idx[minkowski_distance(d,self.data[i],p)<=r].tolist()
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1,rect1,node2.less,less)
+ traverse_checking(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse_checking(node1.less,less,node2,rect2)
+ traverse_checking(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1.less,less1,node2.less,less2)
+ traverse_checking(node1.less,less1,node2.greater,greater2)
+ traverse_checking(node1.greater,greater1,node2.less,less2)
+ traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+ def traverse_no_checking(node1, node2):
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ results[i] += node2.idx.tolist()
+ else:
+ traverse_no_checking(node1, node2.less)
+ traverse_no_checking(node1, node2.greater)
+ else:
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+
+ traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+ return results
+
+
+ def count_neighbors(self, other, r, p=2.):
+ """Count how many nearby pairs can be formed.
+
+ Count the number of pairs (x1,x2) can be formed, with x1 drawn
+ from self and x2 drawn from other, and where distance(x1,x2,p)<=r.
+ This is the "two-point correlation" described in Gray and Moore 2000,
+ "N-body problems in statistical learning", and the code here is based
+ on their algorithm.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ r : float or one-dimensional array of floats
+ The radius to produce a count for. Multiple radii are searched with a single
+ tree traversal.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use
+
+ Returns
+ =======
+
+ result : integer or one-dimensional array of integers
+ The number of pairs. Note that this is internally stored in a numpy int,
+ and so may overflow if very large (two billion).
+ """
+
+ def traverse(node1, rect1, node2, rect2, idx):
+ min_r = rect1.min_distance_rectangle(rect2,p)
+ max_r = rect1.max_distance_rectangle(rect2,p)
+ c_greater = r[idx]>max_r
+ result[idx[c_greater]] += node1.children*node2.children
+ idx = idx[(min_r<=r[idx]) & (r[idx]<=max_r)]
+ if len(idx)==0:
+ return
+
+ if isinstance(node1,KDTree.leafnode):
+ if isinstance(node2,KDTree.leafnode):
+ ds = minkowski_distance(self.data[node1.idx][:,np.newaxis,:],
+ other.data[node2.idx][np.newaxis,:,:],
+ p).ravel()
+ ds.sort()
+ result[idx] += np.searchsorted(ds,r[idx],side='right')
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1, rect1, node2.less, less, idx)
+ traverse(node1, rect1, node2.greater, greater, idx)
+ else:
+ if isinstance(node2,KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less, less, node2, rect2, idx)
+ traverse(node1.greater, greater, node2, rect2, idx)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2,idx)
+ traverse(node1.less,less1,node2.greater,greater2,idx)
+ traverse(node1.greater,greater1,node2.less,less2,idx)
+ traverse(node1.greater,greater1,node2.greater,greater2,idx)
+ R1 = Rectangle(self.maxes, self.mins)
+ R2 = Rectangle(other.maxes, other.mins)
+ if np.shape(r) == ():
+ r = np.array([r])
+ result = np.zeros(1,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(1))
+ return result[0]
+ elif len(np.shape(r))==1:
+ r = np.asarray(r)
+ n, = r.shape
+ result = np.zeros(n,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(n))
+ return result
+ else:
+ raise ValueError("r must be either a single value or a one-dimensional array of values")
+
+ def sparse_distance_matrix(self, other, max_distance, p=2.):
+ """Compute a sparse distance matrix
+
+ Computes a distance matrix between two KDTrees, leaving as zero
+ any distance greater than max_distance.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ max_distance : positive float
+
+ Returns
+ =======
+
+ result : dok_matrix
+ Sparse matrix representing the results in "dictionary of keys" format.
+ """
+ result = scipy.sparse.dok_matrix((self.n,other.n))
+
+ def traverse(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>max_distance:
+ return
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ d = minkowski_distance(self.data[i],other.data[j],p)
+ if d<=max_distance:
+ result[i,j] = d
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1,rect1,node2.less,less)
+ traverse(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less,less,node2,rect2)
+ traverse(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2)
+ traverse(node1.less,less1,node2.greater,greater2)
+ traverse(node1.greater,greater1,node2.less,less2)
+ traverse(node1.greater,greater1,node2.greater,greater2)
+ traverse(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+
+ return result
+
+
+def distance_matrix(x,y,p=2,threshold=1000000):
+ """Compute the distance matrix.
+
+ Computes the matrix of all pairwise distances.
+
+ Parameters
+ ==========
+
+ x : array-like, m by k
+ y : array-like, n by k
+ p : float 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ threshold : positive integer
+ If m*n*k>threshold use a python loop instead of creating
+ a very large temporary.
+
+ Returns
+ =======
+
+ result : array-like, m by n
+
+
+ """
+
+ x = np.asarray(x)
+ m, k = x.shape
+ y = np.asarray(y)
+ n, kk = y.shape
+
+ if k != kk:
+ raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk))
+
+ if m*n*k <= threshold:
+ return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
+ else:
+ result = np.empty((m,n),dtype=np.float) #FIXME: figure out the best dtype
+ if m<n:
+ for i in range(m):
+ result[i,:] = minkowski_distance(x[i],y,p)
+ else:
+ for j in range(n):
+ result[:,j] = minkowski_distance(x,y[j],p)
+ return result
Property changes on: trunk/scipy/spatial/kdtree.py
___________________________________________________________________
Name: svn:mergeinfo
+
Modified: trunk/scipy/spatial/setup.py
===================================================================
--- trunk/scipy/spatial/setup.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/setup.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -8,6 +8,8 @@
config.add_data_dir('tests')
+ config.add_extension('ckdtree', sources=['ckdtree.c']) # FIXME: cython
+
config.add_extension('_distance_wrap',
sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
include_dirs = [get_numpy_include_dirs()])
@@ -19,7 +21,7 @@
setup(maintainer = "SciPy Developers",
author = "Eric Jones",
maintainer_email = "scipy-dev at scipy.org",
- description = "Distance Computation",
+ description = "Spatial algorithms and data structures",
url = "http://www.scipy.org",
license = "SciPy License (BSD Style)",
**configuration(top_path='').todict()
Copied: trunk/scipy/spatial/tests/test_kdtree.py (from rev 4956, branches/spatial/scipy/spatial/tests/test_kdtree.py)
===================================================================
--- branches/spatial/scipy/spatial/tests/test_kdtree.py 2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/tests/test_kdtree.py 2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,428 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+from numpy.testing import *
+
+import numpy as np
+from scipy.spatial import KDTree, Rectangle, distance_matrix, cKDTree
+from scipy.spatial import minkowski_distance as distance
+
+class ConsistencyTests:
+ def test_nearest(self):
+ x = self.x
+ d, i = self.kdtree.query(x, 1)
+ assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+ eps = 1e-8
+ assert np.all(np.sum((self.data-x[np.newaxis,:])**2,axis=1)>d**2-eps)
+
+ def test_m_nearest(self):
+ x = self.x
+ m = self.m
+ dd, ii = self.kdtree.query(x, m)
+ d = np.amax(dd)
+ i = ii[np.argmax(dd)]
+ assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+ eps = 1e-8
+ assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),m)
+
+ def test_points_near(self):
+ x = self.x
+ d = self.d
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d**2,np.sum((x-self.data[near_i])**2))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),hits)
+
+ def test_points_near_l1(self):
+ x = self.x
+ d = self.d
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=1, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d,distance(x,self.data[near_i],1))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(distance(self.data,x,1)<d+eps),hits)
+ def test_points_near_linf(self):
+ x = self.x
+ d = self.d
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=np.inf, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d,distance(x,self.data[near_i],np.inf))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(distance(self.data,x,np.inf)<d+eps),hits)
+
+ def test_approx(self):
+ x = self.x
+ k = self.k
+ eps = 0.1
+ d_real, i_real = self.kdtree.query(x, k)
+ d, i = self.kdtree.query(x, k, eps=eps)
+ assert np.all(d<=d_real*(1+eps))
+
+
+class test_random(ConsistencyTests):
+ def setUp(self):
+ self.n = 100
+ self.m = 4
+ self.data = np.random.randn(self.n, self.m)
+ self.kdtree = KDTree(self.data,leafsize=2)
+ self.x = np.random.randn(self.m)
+ self.d = 0.2
+ self.k = 10
+
+class test_random_far(test_random):
+ def setUp(self):
+ test_random.setUp(self)
+ self.x = np.random.randn(self.m)+10
+
+class test_small(ConsistencyTests):
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data)
+ self.n = self.kdtree.n
+ self.m = self.kdtree.m
+ self.x = np.random.randn(3)
+ self.d = 0.5
+ self.k = 4
+
+ def test_nearest(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 1),
+ (0.1,0))
+ def test_nearest_two(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 2),
+ ([0.1,0.9],[0,1]))
+class test_small_nonleaf(test_small):
+ def setUp(self):
+ test_small.setUp(self)
+ self.kdtree = KDTree(self.data,leafsize=1)
+
+class test_small_compiled(test_small):
+ def setUp(self):
+ test_small.setUp(self)
+ self.kdtree = cKDTree(self.data)
+class test_small_nonleaf_compiled(test_small):
+ def setUp(self):
+ test_small.setUp(self)
+ self.kdtree = cKDTree(self.data,leafsize=1)
+class test_random_compiled(test_random):
+ def setUp(self):
+ test_random.setUp(self)
+ self.kdtree = cKDTree(self.data)
+class test_random_far_compiled(test_random_far):
+ def setUp(self):
+ test_random_far.setUp(self)
+ self.kdtree = cKDTree(self.data)
+
+class test_vectorization:
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data)
+
+ def test_single_query(self):
+ d, i = self.kdtree.query([0,0,0])
+ assert isinstance(d,float)
+ assert isinstance(i,int)
+
+ def test_vectorized_query(self):
+ d, i = self.kdtree.query(np.zeros((2,4,3)))
+ assert_equal(np.shape(d),(2,4))
+ assert_equal(np.shape(i),(2,4))
+
+ def test_single_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query([0,0,0],k=kk)
+ assert_equal(np.shape(d),(kk,))
+ assert_equal(np.shape(i),(kk,))
+ assert np.all(~np.isfinite(d[-s:]))
+ assert np.all(i[-s:]==self.kdtree.n)
+ def test_vectorized_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query(np.zeros((2,4,3)),k=kk)
+ assert_equal(np.shape(d),(2,4,kk))
+ assert_equal(np.shape(i),(2,4,kk))
+ assert np.all(~np.isfinite(d[:,:,-s:]))
+ assert np.all(i[:,:,-s:]==self.kdtree.n)
+ def test_single_query_all_neighbors(self):
+ d, i = self.kdtree.query([0,0,0],k=None,distance_upper_bound=1.1)
+ assert isinstance(d,list)
+ assert isinstance(i,list)
+ def test_vectorized_query_all_neighbors(self):
+ d, i = self.kdtree.query(np.zeros((2,4,3)),k=None,distance_upper_bound=1.1)
+ assert_equal(np.shape(d),(2,4))
+ assert_equal(np.shape(i),(2,4))
+
+ assert isinstance(d[0,0],list)
+ assert isinstance(i[0,0],list)
+
+class test_vectorization_compiled:
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data)
+
+ def test_single_query(self):
+ d, i = self.kdtree.query([0,0,0])
+ assert isinstance(d,float)
+ assert isinstance(i,int)
+
+ def test_vectorized_query(self):
+ d, i = self.kdtree.query(np.zeros((2,4,3)))
+ assert_equal(np.shape(d),(2,4))
+ assert_equal(np.shape(i),(2,4))
+
+ def test_single_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query([0,0,0],k=kk)
+ assert_equal(np.shape(d),(kk,))
+ assert_equal(np.shape(i),(kk,))
+ assert np.all(~np.isfinite(d[-s:]))
+ assert np.all(i[-s:]==self.kdtree.n)
+ def test_vectorized_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query(np.zeros((2,4,3)),k=kk)
+ assert_equal(np.shape(d),(2,4,kk))
+ assert_equal(np.shape(i),(2,4,kk))
+ assert np.all(~np.isfinite(d[:,:,-s:]))
+ assert np.all(i[:,:,-s:]==self.kdtree.n)
+
+class ball_consistency:
+
+ def test_in_ball(self):
+ l = self.T.query_ball_point(self.x, self.d, p=self.p, eps=self.eps)
+ for i in l:
+ assert distance(self.data[i],self.x,self.p)<=self.d*(1.+self.eps)
+
+ def test_found_all(self):
+ c = np.ones(self.T.n,dtype=np.bool)
+ l = self.T.query_ball_point(self.x, self.d, p=self.p, eps=self.eps)
+ c[l] = False
+ assert np.all(distance(self.data[c],self.x,self.p)>=self.d/(1.+self.eps))
+
+class test_random_ball(ball_consistency):
+
+ def setUp(self):
+ n = 100
+ m = 4
+ self.data = np.random.randn(n,m)
+ self.T = KDTree(self.data,leafsize=2)
+ self.x = np.random.randn(m)
+ self.p = 2.
+ self.eps = 0
+ self.d = 0.2
+
+class test_random_ball_approx(test_random_ball):
+
+ def setUp(self):
+ test_random_ball.setUp(self)
+ self.eps = 0.1
+
+class test_random_ball_far(test_random_ball):
+
+ def setUp(self):
+ test_random_ball.setUp(self)
+ self.d = 2.
+
+class test_random_ball_l1(test_random_ball):
+
+ def setUp(self):
+ test_random_ball.setUp(self)
+ self.p = 1
+
+class test_random_ball_linf(test_random_ball):
+
+ def setUp(self):
+ test_random_ball.setUp(self)
+ self.p = np.inf
+
+def test_random_ball_vectorized():
+
+ n = 20
+ m = 5
+ T = KDTree(np.random.randn(n,m))
+
+ r = T.query_ball_point(np.random.randn(2,3,m),1)
+ assert_equal(r.shape,(2,3))
+ assert isinstance(r[0,0],list)
+
+class two_trees_consistency:
+
+ def test_all_in_ball(self):
+ r = self.T1.query_ball_tree(self.T2, self.d, p=self.p, eps=self.eps)
+ for i, l in enumerate(r):
+ for j in l:
+ assert distance(self.data1[i],self.data2[j],self.p)<=self.d*(1.+self.eps)
+ def test_found_all(self):
+ r = self.T1.query_ball_tree(self.T2, self.d, p=self.p, eps=self.eps)
+ for i, l in enumerate(r):
+ c = np.ones(self.T2.n,dtype=np.bool)
+ c[l] = False
+ assert np.all(distance(self.data2[c],self.data1[i],self.p)>=self.d/(1.+self.eps))
+
+class test_two_random_trees(two_trees_consistency):
+
+ def setUp(self):
+ n = 50
+ m = 4
+ self.data1 = np.random.randn(n,m)
+ self.T1 = KDTree(self.data1,leafsize=2)
+ self.data2 = np.random.randn(n,m)
+ self.T2 = KDTree(self.data2,leafsize=2)
+ self.p = 2.
+ self.eps = 0
+ self.d = 0.2
+
+class test_two_random_trees_far(test_two_random_trees):
+
+ def setUp(self):
+ test_two_random_trees.setUp(self)
+ self.d = 2
+
+class test_two_random_trees_linf(test_two_random_trees):
+
+ def setUp(self):
+ test_two_random_trees.setUp(self)
+ self.p = np.inf
+
+
+class test_rectangle:
+
+ def setUp(self):
+ self.rect = Rectangle([0,0],[1,1])
+
+ def test_min_inside(self):
+ assert_almost_equal(self.rect.min_distance_point([0.5,0.5]),0)
+ def test_min_one_side(self):
+ assert_almost_equal(self.rect.min_distance_point([0.5,1.5]),0.5)
+ def test_min_two_sides(self):
+ assert_almost_equal(self.rect.min_distance_point([2,2]),np.sqrt(2))
+ def test_max_inside(self):
+ assert_almost_equal(self.rect.max_distance_point([0.5,0.5]),1/np.sqrt(2))
+ def test_max_one_side(self):
+ assert_almost_equal(self.rect.max_distance_point([0.5,1.5]),np.hypot(0.5,1.5))
+ def test_max_two_sides(self):
+ assert_almost_equal(self.rect.max_distance_point([2,2]),2*np.sqrt(2))
+
+ def test_split(self):
+ less, greater = self.rect.split(0,0.1)
+ assert_array_equal(less.maxes,[0.1,1])
+ assert_array_equal(less.mins,[0,0])
+ assert_array_equal(greater.maxes,[1,1])
+ assert_array_equal(greater.mins,[0.1,0])
+
+
+def test_distance_l2():
+ assert_almost_equal(distance([0,0],[1,1],2),np.sqrt(2))
+def test_distance_l1():
+ assert_almost_equal(distance([0,0],[1,1],1),2)
+def test_distance_linf():
+ assert_almost_equal(distance([0,0],[1,1],np.inf),1)
+def test_distance_vectorization():
+ x = np.random.randn(10,1,3)
+ y = np.random.randn(1,7,3)
+ assert_equal(distance(x,y).shape,(10,7))
+
+class test_count_neighbors:
+
+ def setUp(self):
+ n = 50
+ m = 2
+ self.T1 = KDTree(np.random.randn(n,m),leafsize=2)
+ self.T2 = KDTree(np.random.randn(n,m),leafsize=2)
+
+ def test_one_radius(self):
+ r = 0.2
+ assert_equal(self.T1.count_neighbors(self.T2, r),
+ np.sum([len(l) for l in self.T1.query_ball_tree(self.T2,r)]))
+
+ def test_large_radius(self):
+ r = 1000
+ assert_equal(self.T1.count_neighbors(self.T2, r),
+ np.sum([len(l) for l in self.T1.query_ball_tree(self.T2,r)]))
+
+ def test_multiple_radius(self):
+ rs = np.exp(np.linspace(np.log(0.01),np.log(10),3))
+ results = self.T1.count_neighbors(self.T2, rs)
+ assert np.all(np.diff(results)>=0)
+ for r,result in zip(rs, results):
+ assert_equal(self.T1.count_neighbors(self.T2, r), result)
+
+class test_sparse_distance_matrix:
+ def setUp(self):
+ n = 50
+ m = 4
+ self.T1 = KDTree(np.random.randn(n,m),leafsize=2)
+ self.T2 = KDTree(np.random.randn(n,m),leafsize=2)
+ self.r = 0.3
+
+ def test_consistency_with_neighbors(self):
+ M = self.T1.sparse_distance_matrix(self.T2, self.r)
+ r = self.T1.query_ball_tree(self.T2, self.r)
+ for i,l in enumerate(r):
+ for j in l:
+ assert_equal(M[i,j],distance(self.T1.data[i],self.T2.data[j]))
+ for ((i,j),d) in M.items():
+ assert j in r[i]
+
+def test_distance_matrix():
+ m = 10
+ n = 11
+ k = 4
+ xs = np.random.randn(m,k)
+ ys = np.random.randn(n,k)
+ ds = distance_matrix(xs,ys)
+ assert_equal(ds.shape, (m,n))
+ for i in range(m):
+ for j in range(n):
+ assert_equal(distance(xs[i],ys[j]),ds[i,j])
+def test_distance_matrix_looping():
+ m = 10
+ n = 11
+ k = 4
+ xs = np.random.randn(m,k)
+ ys = np.random.randn(n,k)
+ ds = distance_matrix(xs,ys)
+ dsl = distance_matrix(xs,ys,threshold=1)
+ assert_equal(ds,dsl)
+
+if __name__=="__main__":
+ run_module_suite()
Property changes on: trunk/scipy/spatial/tests/test_kdtree.py
___________________________________________________________________
Name: svn:mergeinfo
+
More information about the Scipy-svn
mailing list