[Scipy-svn] r4957 - in trunk/scipy/spatial: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Mon Nov 3 04:06:59 EST 2008


Author: peridot
Date: 2008-11-03 03:06:52 -0600 (Mon, 03 Nov 2008)
New Revision: 4957

Added:
   trunk/scipy/spatial/ckdtree.c
   trunk/scipy/spatial/ckdtree.pyx
   trunk/scipy/spatial/info.py
   trunk/scipy/spatial/kdtree.py
   trunk/scipy/spatial/tests/test_kdtree.py
Removed:
   trunk/scipy/spatial/info.py
Modified:
   trunk/scipy/spatial/__init__.py
   trunk/scipy/spatial/setup.py
Log:
Merged kdtree code. I think scons support is broken.


Modified: trunk/scipy/spatial/__init__.py
===================================================================
--- trunk/scipy/spatial/__init__.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/__init__.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -3,8 +3,11 @@
 #
 
 from info import __doc__
+from kdtree import *
+from ckdtree import *
 
-__all__ = ['distance']
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ += ['distance']
 
 import distance
 from numpy.testing import Tester

Copied: trunk/scipy/spatial/ckdtree.c (from rev 4956, branches/spatial/scipy/spatial/ckdtree.c)


Property changes on: trunk/scipy/spatial/ckdtree.c
___________________________________________________________________
Name: svn:mergeinfo
   + 

Copied: trunk/scipy/spatial/ckdtree.pyx (from rev 4956, branches/spatial/scipy/spatial/ckdtree.pyx)

Deleted: trunk/scipy/spatial/info.py
===================================================================
--- trunk/scipy/spatial/info.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/info.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -1,8 +0,0 @@
-"""
-Distance Computation
-====================
-
-    The distance module provides functions for computing distances between
-    pairs of vectors from a set of observation vectors.
-
-"""

Copied: trunk/scipy/spatial/info.py (from rev 4956, branches/spatial/scipy/spatial/info.py)
===================================================================
--- branches/spatial/scipy/spatial/info.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/info.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,12 @@
+"""
+Spatial data structures and algorithms
+======================================
+
+Nearest-neighbor queries:
+
+    KDTree      -- class for efficient nearest-neighbor queries
+    distance    -- module containing many different distance measures
+
+"""
+
+postpone_import = 1


Property changes on: trunk/scipy/spatial/info.py
___________________________________________________________________
Name: svn:mergeinfo
   + 

Copied: trunk/scipy/spatial/kdtree.py (from rev 4956, branches/spatial/scipy/spatial/kdtree.py)
===================================================================
--- branches/spatial/scipy/spatial/kdtree.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/kdtree.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,675 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import numpy as np
+from heapq import heappush, heappop
+import scipy.sparse
+
+def minkowski_distance_p(x,y,p=2):
+    """Compute the pth power of the L**p distance between x and y
+    
+    For efficiency, this function computes the L**p distance but does
+    not extract the pth root. If p is 1 or infinity, this is equal to
+    the actual L**p distance.
+    """
+    x = np.asarray(x)
+    y = np.asarray(y)
+    if p==np.inf:
+        return np.amax(np.abs(y-x),axis=-1)
+    elif p==1:
+        return np.sum(np.abs(y-x),axis=-1)
+    else:
+        return np.sum(np.abs(y-x)**p,axis=-1)
+def minkowski_distance(x,y,p=2):
+    """Compute the L**p distance between x and y"""
+    x = np.asarray(x)
+    y = np.asarray(y)
+    if p==np.inf or p==1:
+        return minkowski_distance_p(x,y,p)
+    else:
+        return minkowski_distance_p(x,y,p)**(1./p)
+
+class Rectangle(object):
+    """Hyperrectangle class.
+
+    Represents a Cartesian product of intervals.
+    """
+    def __init__(self, maxes, mins):
+        """Construct a hyperrectangle."""
+        self.maxes = np.maximum(maxes,mins).astype(np.float)
+        self.mins = np.minimum(maxes,mins).astype(np.float)
+        self.m, = self.maxes.shape
+
+    def __repr__(self):
+        return "<Rectangle %s>" % zip(self.mins, self.maxes)
+
+    def volume(self):
+        """Total volume."""
+        return np.prod(self.maxes-self.mins)
+    
+    def split(self, d, split):
+        """Produce two hyperrectangles by splitting along axis d.
+        
+        In general, if you need to compute maximum and minimum
+        distances to the children, it can be done more efficiently
+        by updating the maximum and minimum distances to the parent.
+        """ # FIXME: do this
+        mid = np.copy(self.maxes)
+        mid[d] = split
+        less = Rectangle(self.mins, mid)
+        mid = np.copy(self.mins)
+        mid[d] = split
+        greater = Rectangle(mid, self.maxes)
+        return less, greater
+
+    def min_distance_point(self, x, p=2.):
+        """Compute the minimum distance between x and a point in the hyperrectangle."""
+        return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-x,x-self.maxes)),p)
+
+    def max_distance_point(self, x, p=2.):
+        """Compute the maximum distance between x and a point in the hyperrectangle."""
+        return minkowski_distance(0, np.maximum(self.maxes-x,x-self.mins),p)
+
+    def min_distance_rectangle(self, other, p=2.):
+        """Compute the minimum distance between points in the two hyperrectangles."""
+        return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-other.maxes,other.mins-self.maxes)),p)
+
+    def max_distance_rectangle(self, other, p=2.):
+        """Compute the maximum distance between points in the two hyperrectangles."""
+        return minkowski_distance(0, np.maximum(self.maxes-other.mins,other.maxes-self.mins),p)
+
+
+class KDTree(object):
+    """kd-tree for quick nearest-neighbor lookup
+
+    This class provides an index into a set of k-dimensional points
+    which can be used to rapidly look up the nearest neighbors of any
+    point. 
+
+    The algorithm used is described in Maneewongvatana and Mount 1999. 
+    The general idea is that the kd-tree is a binary trie, each of whose
+    nodes represents an axis-aligned hyperrectangle. Each node specifies
+    an axis and splits the set of points based on whether their coordinate
+    along that axis is greater than or less than a particular value. 
+
+    During construction, the axis and splitting point are chosen by the 
+    "sliding midpoint" rule, which ensures that the cells do not all
+    become long and thin. 
+
+    The tree can be queried for the r closest neighbors of any given point 
+    (optionally returning only those within some maximum distance of the 
+    point). It can also be queried, with a substantial gain in efficiency, 
+    for the r approximate closest neighbors.
+
+    For large dimensions (20 is already large) do not expect this to run 
+    significantly faster than brute force. High-dimensional nearest-neighbor
+    queries are a substantial open problem in computer science.
+
+    The tree also supports all-neighbors queries, both with arrays of points
+    and with other kd-trees. These do use a reasonably efficient algorithm,
+    but the kd-tree is not necessarily the best data structure for this
+    sort of calculation.
+    """
+
+    def __init__(self, data, leafsize=10):
+        """Construct a kd-tree.
+
+        Parameters:
+        ===========
+
+        data : array-like, shape (n,k)
+            The data points to be indexed. This array is not copied, and
+            so modifying this data will result in bogus results.
+        leafsize : positive integer
+            The number of points at which the algorithm switches over to
+            brute-force.
+        """
+        self.data = np.asarray(data)
+        self.n, self.m = np.shape(self.data)
+        self.leafsize = int(leafsize)
+        if self.leafsize<1:
+            raise ValueError("leafsize must be at least 1")
+        self.maxes = np.amax(self.data,axis=0)
+        self.mins = np.amin(self.data,axis=0)
+
+        self.tree = self.__build(np.arange(self.n), self.maxes, self.mins)
+
+    class node(object):
+        pass
+    class leafnode(node):
+        def __init__(self, idx):
+            self.idx = idx
+            self.children = len(idx)
+    class innernode(node):
+        def __init__(self, split_dim, split, less, greater):
+            self.split_dim = split_dim
+            self.split = split
+            self.less = less
+            self.greater = greater
+            self.children = less.children+greater.children
+    
+    def __build(self, idx, maxes, mins):
+        if len(idx)<=self.leafsize:
+            return KDTree.leafnode(idx)
+        else:
+            data = self.data[idx]
+            #maxes = np.amax(data,axis=0)
+            #mins = np.amin(data,axis=0)
+            d = np.argmax(maxes-mins)
+            maxval = maxes[d]
+            minval = mins[d]
+            if maxval==minval:
+                # all points are identical; warn user?
+                return KDTree.leafnode(idx)
+            data = data[:,d]
+
+            # sliding midpoint rule; see Maneewongvatana and Mount 1999
+            # for arguments that this is a good idea.
+            split = (maxval+minval)/2
+            less_idx = np.nonzero(data<=split)[0]
+            greater_idx = np.nonzero(data>split)[0]
+            if len(less_idx)==0:
+                split = np.amin(data)
+                less_idx = np.nonzero(data<=split)[0]
+                greater_idx = np.nonzero(data>split)[0]
+            if len(greater_idx)==0:
+                split = np.amax(data)
+                less_idx = np.nonzero(data<split)[0]
+                greater_idx = np.nonzero(data>=split)[0]
+            if len(less_idx)==0:
+                # _still_ zero? all must have the same value
+                assert np.all(data==data[0]), "Troublesome data array: %s" % data
+                split = data[0]
+                less_idx = np.arange(len(data)-1)
+                greater_idx = np.array([len(data)-1])
+
+            lessmaxes = np.copy(maxes)
+            lessmaxes[d] = split
+            greatermins = np.copy(mins)
+            greatermins[d] = split
+            return KDTree.innernode(d, split, 
+                    self.__build(idx[less_idx],lessmaxes,mins),
+                    self.__build(idx[greater_idx],maxes,greatermins))
+
+    def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+        
+        side_distances = np.maximum(0,np.maximum(x-self.maxes,self.mins-x))
+        if p!=np.inf:
+            side_distances**=p
+            min_distance = np.sum(side_distances)
+        else:
+            min_distance = np.amax(side_distances)
+
+        # priority queue for chasing nodes
+        # entries are:
+        #  minimum distance between the cell and the target
+        #  distances between the nearest side of the cell and the target
+        #  the head node of the cell
+        q = [(min_distance,
+              tuple(side_distances),                   
+              self.tree)]
+        # priority queue for the nearest neighbors
+        # furthest known neighbor first
+        # entries are (-distance**p, i)
+        neighbors = []
+
+        if eps==0:
+            epsfac=1
+        elif p==np.inf:
+            epsfac = 1/(1+eps)
+        else:
+            epsfac = 1/(1+eps)**p
+
+        if p!=np.inf and distance_upper_bound!=np.inf:
+            distance_upper_bound = distance_upper_bound**p
+
+        while q:
+            min_distance, side_distances, node = heappop(q)
+            if isinstance(node, KDTree.leafnode):
+                # brute-force
+                data = self.data[node.idx]
+                ds = minkowski_distance_p(data,x[np.newaxis,:],p)
+                for i in range(len(ds)):
+                    if ds[i]<distance_upper_bound:
+                        if len(neighbors)==k:
+                            heappop(neighbors)
+                        heappush(neighbors, (-ds[i], node.idx[i]))
+                        if len(neighbors)==k:
+                            distance_upper_bound = -neighbors[0][0]
+            else:
+                # we don't push cells that are too far onto the queue at all,
+                # but since the distance_upper_bound decreases, we might get 
+                # here even if the cell's too far
+                if min_distance>distance_upper_bound*epsfac:
+                    # since this is the nearest cell, we're done, bail out
+                    break
+                # compute minimum distances to the children and push them on
+                if x[node.split_dim]<node.split:
+                    near, far = node.less, node.greater
+                else:
+                    near, far = node.greater, node.less
+
+                # near child is at the same distance as the current node
+                heappush(q,(min_distance, side_distances, near))
+
+                # far child is further by an amount depending only
+                # on the split value
+                sd = list(side_distances)
+                if p == np.inf:
+                    min_distance = max(min_distance, abs(node.split-x[node.split_dim]))
+                elif p == 1:
+                    sd[node.split_dim] = np.abs(node.split-x[node.split_dim])
+                    min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+                else:
+                    sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p
+                    min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+
+                # far child might be too far, if so, don't bother pushing it
+                if min_distance<=distance_upper_bound*epsfac:
+                    heappush(q,(min_distance, tuple(sd), far))
+
+        if p==np.inf:
+            return sorted([(-d,i) for (d,i) in neighbors])
+        else:
+            return sorted([((-d)**(1./p),i) for (d,i) in neighbors])
+
+    def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+        """query the kd-tree for nearest neighbors
+
+        Parameters:
+        ===========
+
+        x : array-like, last dimension self.m
+            An array of points to query.
+        k : integer
+            The number of nearest neighbors to return.
+        eps : nonnegative float
+            Return approximate nearest neighbors; the kth returned value 
+            is guaranteed to be no further than (1+eps) times the 
+            distance to the real kth nearest neighbor.
+        p : float, 1<=p<=infinity
+            Which Minkowski p-norm to use. 
+            1 is the sum-of-absolute-values "Manhattan" distance
+            2 is the usual Euclidean distance
+            infinity is the maximum-coordinate-difference distance
+        distance_upper_bound : nonnegative float
+            Return only neighbors within this distance. This is used to prune
+            tree searches, so if you are doing a series of nearest-neighbor
+            queries, it may help to supply the distance to the nearest neighbor
+            of the most recent point.
+
+        Returns:
+        ========
+        
+        d : array of floats
+            The distances to the nearest neighbors. 
+            If x has shape tuple+(self.m,), then d has shape tuple if 
+            k is one, or tuple+(k,) if k is larger than one.  Missing 
+            neighbors are indicated with infinite distances.  If k is None, 
+            then d is an object array of shape tuple, containing lists 
+            of distances. In either case the hits are sorted by distance 
+            (nearest first).
+        i : array of integers
+            The locations of the neighbors in self.data. i is the same
+            shape as d.
+        """
+        x = np.asarray(x)
+        if np.shape(x)[-1] != self.m:
+            raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+        if p<1:
+            raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+        retshape = np.shape(x)[:-1]
+        if retshape!=():
+            if k>1:
+                dd = np.empty(retshape+(k,),dtype=np.float)
+                dd.fill(np.inf)
+                ii = np.empty(retshape+(k,),dtype=np.int)
+                ii.fill(self.n)
+            elif k==1:
+                dd = np.empty(retshape,dtype=np.float)
+                dd.fill(np.inf)
+                ii = np.empty(retshape,dtype=np.int)
+                ii.fill(self.n)
+            elif k is None:
+                dd = np.empty(retshape,dtype=np.object)
+                ii = np.empty(retshape,dtype=np.object)
+            else:
+                raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+            for c in np.ndindex(retshape):
+                hits = self.__query(x[c], k=k, p=p, distance_upper_bound=distance_upper_bound)
+                if k>1:
+                    for j in range(len(hits)):
+                        dd[c+(j,)], ii[c+(j,)] = hits[j]
+                elif k==1:
+                    if len(hits)>0:
+                        dd[c], ii[c] = hits[0]
+                    else:
+                        dd[c] = np.inf
+                        ii[c] = self.n
+                elif k is None:
+                    dd[c] = [d for (d,i) in hits]
+                    ii[c] = [i for (d,i) in hits]
+            return dd, ii
+        else:
+            hits = self.__query(x, k=k, p=p, distance_upper_bound=distance_upper_bound)
+            if k==1:
+                if len(hits)>0:
+                    return hits[0]
+                else:
+                    return np.inf, self.n
+            elif k>1:
+                dd = np.empty(k,dtype=np.float)
+                dd.fill(np.inf)
+                ii = np.empty(k,dtype=np.int)
+                ii.fill(self.n)
+                for j in range(len(hits)):
+                    dd[j], ii[j] = hits[j]
+                return dd, ii
+            elif k is None:
+                return [d for (d,i) in hits], [i for (d,i) in hits]
+            else:
+                raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+
+
+    def __query_ball_point(self, x, r, p=2., eps=0):
+        R = Rectangle(self.maxes, self.mins)
+
+        def traverse_checking(node, rect):
+            if rect.min_distance_point(x,p)>=r/(1.+eps):
+                return []
+            elif rect.max_distance_point(x,p)<r*(1.+eps):
+                return traverse_no_checking(node)
+            elif isinstance(node, KDTree.leafnode):
+                d = self.data[node.idx]
+                return node.idx[minkowski_distance(d,x,p)<=r].tolist()
+            else:
+                less, greater = rect.split(node.split_dim, node.split)
+                return traverse_checking(node.less, less)+traverse_checking(node.greater, greater)
+        def traverse_no_checking(node):
+            if isinstance(node, KDTree.leafnode):
+                
+                return node.idx.tolist()
+            else:
+                return traverse_no_checking(node.less)+traverse_no_checking(node.greater)
+
+        return traverse_checking(self.tree, R)
+
+    def query_ball_point(self, x, r, p=2., eps=0):
+        """Find all points within r of x
+
+        Parameters
+        ==========
+
+        x : array_like, shape tuple + (self.m,)
+            The point or points to search for neighbors of
+        r : positive float
+            The radius of points to return
+        p : float 1<=p<=infinity
+            Which Minkowski p-norm to use
+        eps : nonnegative float
+            Approximate search. Branches of the tree are not explored
+            if their nearest points are further than r/(1+eps), and branches
+            are added in bulk if their furthest points are nearer than r*(1+eps).
+
+        Returns
+        =======
+
+        results : list or array of lists
+            If x is a single point, returns a list of the indices of the neighbors
+            of x. If x is an array of points, returns an object array of shape tuple
+            containing lists of neighbors.
+
+
+        Note: if you have many points whose neighbors you want to find, you may save
+        substantial amounts of time by putting them in a KDTree and using query_ball_tree.
+        """
+        x = np.asarray(x)
+        if x.shape[-1]!=self.m:
+            raise ValueError("Searching for a %d-dimensional point in a %d-dimensional KDTree" % (x.shape[-1],self.m))
+        if len(x.shape)==1:
+            return self.__query_ball_point(x,r,p,eps)
+        else:
+            retshape = x.shape[:-1]
+            result = np.empty(retshape,dtype=np.object)
+            for c in np.ndindex(retshape):
+                result[c] = self.__query_ball_point(x[c], r, p=p, eps=eps)
+            return result
+
+    def query_ball_tree(self, other, r, p=2., eps=0):
+        """Find all pairs of points whose distance is at most r
+
+        Parameters
+        ==========
+
+        other : KDTree
+            The tree containing points to search against
+        r : positive float
+            The maximum distance
+        p : float 1<=p<=infinity
+            Which Minkowski norm to use
+        eps : nonnegative float
+            Approximate search. Branches of the tree are not explored
+            if their nearest points are further than r/(1+eps), and branches
+            are added in bulk if their furthest points are nearer than r*(1+eps).
+        
+        Returns
+        =======
+
+        results : list of lists
+            For each element self.data[i] of this tree, results[i] is a list of the
+            indices of its neighbors in other.data.
+        """
+        results = [[] for i in range(self.n)]
+        def traverse_checking(node1, rect1, node2, rect2):
+            if rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+                return
+            elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+                traverse_no_checking(node1, node2)
+            elif isinstance(node1, KDTree.leafnode):
+                if isinstance(node2, KDTree.leafnode):
+                    d = other.data[node2.idx]
+                    for i in node1.idx:
+                        results[i] += node2.idx[minkowski_distance(d,self.data[i],p)<=r].tolist()
+                else:
+                    less, greater = rect2.split(node2.split_dim, node2.split)
+                    traverse_checking(node1,rect1,node2.less,less)
+                    traverse_checking(node1,rect1,node2.greater,greater)
+            elif isinstance(node2, KDTree.leafnode):
+                less, greater = rect1.split(node1.split_dim, node1.split)
+                traverse_checking(node1.less,less,node2,rect2)
+                traverse_checking(node1.greater,greater,node2,rect2)
+            else:
+                less1, greater1 = rect1.split(node1.split_dim, node1.split)
+                less2, greater2 = rect2.split(node2.split_dim, node2.split)
+                traverse_checking(node1.less,less1,node2.less,less2)
+                traverse_checking(node1.less,less1,node2.greater,greater2)
+                traverse_checking(node1.greater,greater1,node2.less,less2)
+                traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+        def traverse_no_checking(node1, node2):
+            if isinstance(node1, KDTree.leafnode):
+                if isinstance(node2, KDTree.leafnode):
+                    for i in node1.idx:
+                        results[i] += node2.idx.tolist()
+                else:
+                    traverse_no_checking(node1, node2.less)
+                    traverse_no_checking(node1, node2.greater)
+            else:
+                traverse_no_checking(node1.less, node2)
+                traverse_no_checking(node1.greater, node2)
+
+        traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+                          other.tree, Rectangle(other.maxes, other.mins))
+        return results
+
+        
+    def count_neighbors(self, other, r, p=2.):
+        """Count how many nearby pairs can be formed.
+
+        Count the number of pairs (x1,x2) can be formed, with x1 drawn
+        from self and x2 drawn from other, and where distance(x1,x2,p)<=r.
+        This is the "two-point correlation" described in Gray and Moore 2000,
+        "N-body problems in statistical learning", and the code here is based
+        on their algorithm.
+
+        Parameters
+        ==========
+
+        other : KDTree
+
+        r : float or one-dimensional array of floats
+            The radius to produce a count for. Multiple radii are searched with a single
+            tree traversal.
+        p : float, 1<=p<=infinity
+            Which Minkowski p-norm to use
+
+        Returns
+        =======
+
+        result : integer or one-dimensional array of integers
+            The number of pairs. Note that this is internally stored in a numpy int,
+            and so may overflow if very large (two billion). 
+        """
+
+        def traverse(node1, rect1, node2, rect2, idx):
+            min_r = rect1.min_distance_rectangle(rect2,p)
+            max_r = rect1.max_distance_rectangle(rect2,p)
+            c_greater = r[idx]>max_r
+            result[idx[c_greater]] += node1.children*node2.children
+            idx = idx[(min_r<=r[idx]) & (r[idx]<=max_r)]
+            if len(idx)==0:
+                return
+
+            if isinstance(node1,KDTree.leafnode):
+                if isinstance(node2,KDTree.leafnode):
+                    ds = minkowski_distance(self.data[node1.idx][:,np.newaxis,:],
+                                  other.data[node2.idx][np.newaxis,:,:],
+                                  p).ravel()
+                    ds.sort()
+                    result[idx] += np.searchsorted(ds,r[idx],side='right')
+                else:
+                    less, greater = rect2.split(node2.split_dim, node2.split)
+                    traverse(node1, rect1, node2.less, less, idx)
+                    traverse(node1, rect1, node2.greater, greater, idx)
+            else:
+                if isinstance(node2,KDTree.leafnode):
+                    less, greater = rect1.split(node1.split_dim, node1.split)
+                    traverse(node1.less, less, node2, rect2, idx)
+                    traverse(node1.greater, greater, node2, rect2, idx)
+                else:
+                    less1, greater1 = rect1.split(node1.split_dim, node1.split)
+                    less2, greater2 = rect2.split(node2.split_dim, node2.split)
+                    traverse(node1.less,less1,node2.less,less2,idx)
+                    traverse(node1.less,less1,node2.greater,greater2,idx)
+                    traverse(node1.greater,greater1,node2.less,less2,idx)
+                    traverse(node1.greater,greater1,node2.greater,greater2,idx)
+        R1 = Rectangle(self.maxes, self.mins)
+        R2 = Rectangle(other.maxes, other.mins)
+        if np.shape(r) == ():
+            r = np.array([r])
+            result = np.zeros(1,dtype=int)
+            traverse(self.tree, R1, other.tree, R2, np.arange(1))
+            return result[0]
+        elif len(np.shape(r))==1:
+            r = np.asarray(r)
+            n, = r.shape
+            result = np.zeros(n,dtype=int)
+            traverse(self.tree, R1, other.tree, R2, np.arange(n))
+            return result
+        else:
+            raise ValueError("r must be either a single value or a one-dimensional array of values")
+        
+    def sparse_distance_matrix(self, other, max_distance, p=2.):
+        """Compute a sparse distance matrix
+
+        Computes a distance matrix between two KDTrees, leaving as zero
+        any distance greater than max_distance.
+
+        Parameters
+        ==========
+
+        other : KDTree
+
+        max_distance : positive float
+
+        Returns
+        =======
+
+        result : dok_matrix
+            Sparse matrix representing the results in "dictionary of keys" format.
+        """
+        result = scipy.sparse.dok_matrix((self.n,other.n))
+
+        def traverse(node1, rect1, node2, rect2):
+            if rect1.min_distance_rectangle(rect2, p)>max_distance:
+                return
+            elif isinstance(node1, KDTree.leafnode):
+                if isinstance(node2, KDTree.leafnode):
+                    for i in node1.idx:
+                        for j in node2.idx:
+                            d = minkowski_distance(self.data[i],other.data[j],p)
+                            if d<=max_distance:
+                                result[i,j] = d
+                else:
+                    less, greater = rect2.split(node2.split_dim, node2.split)
+                    traverse(node1,rect1,node2.less,less)
+                    traverse(node1,rect1,node2.greater,greater)
+            elif isinstance(node2, KDTree.leafnode):
+                less, greater = rect1.split(node1.split_dim, node1.split)
+                traverse(node1.less,less,node2,rect2)
+                traverse(node1.greater,greater,node2,rect2)
+            else:
+                less1, greater1 = rect1.split(node1.split_dim, node1.split)
+                less2, greater2 = rect2.split(node2.split_dim, node2.split)
+                traverse(node1.less,less1,node2.less,less2)
+                traverse(node1.less,less1,node2.greater,greater2)
+                traverse(node1.greater,greater1,node2.less,less2)
+                traverse(node1.greater,greater1,node2.greater,greater2)
+        traverse(self.tree, Rectangle(self.maxes, self.mins),
+                 other.tree, Rectangle(other.maxes, other.mins))
+
+        return result
+
+
+def distance_matrix(x,y,p=2,threshold=1000000):
+    """Compute the distance matrix.
+
+    Computes the matrix of all pairwise distances.
+
+    Parameters
+    ==========
+
+    x : array-like, m by k
+    y : array-like, n by k
+    p : float 1<=p<=infinity
+        Which Minkowski p-norm to use.
+    threshold : positive integer
+        If m*n*k>threshold use a python loop instead of creating
+        a very large temporary.
+
+    Returns
+    =======
+
+    result : array-like, m by n
+
+
+    """
+
+    x = np.asarray(x)
+    m, k = x.shape
+    y = np.asarray(y)
+    n, kk = y.shape
+
+    if k != kk:
+        raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk))
+
+    if m*n*k <= threshold:
+        return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
+    else:
+        result = np.empty((m,n),dtype=np.float) #FIXME: figure out the best dtype
+        if m<n:
+            for i in range(m):
+                result[i,:] = minkowski_distance(x[i],y,p)
+        else:
+            for j in range(n):
+                result[:,j] = minkowski_distance(x,y[j],p)
+        return result


Property changes on: trunk/scipy/spatial/kdtree.py
___________________________________________________________________
Name: svn:mergeinfo
   + 

Modified: trunk/scipy/spatial/setup.py
===================================================================
--- trunk/scipy/spatial/setup.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/setup.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -8,6 +8,8 @@
 
     config.add_data_dir('tests')
 
+    config.add_extension('ckdtree', sources=['ckdtree.c']) # FIXME: cython
+
     config.add_extension('_distance_wrap',
         sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
         include_dirs = [get_numpy_include_dirs()])
@@ -19,7 +21,7 @@
     setup(maintainer = "SciPy Developers",
           author = "Eric Jones",
           maintainer_email = "scipy-dev at scipy.org",
-          description = "Distance Computation",
+          description = "Spatial algorithms and data structures",
           url = "http://www.scipy.org",
           license = "SciPy License (BSD Style)",
           **configuration(top_path='').todict()

Copied: trunk/scipy/spatial/tests/test_kdtree.py (from rev 4956, branches/spatial/scipy/spatial/tests/test_kdtree.py)
===================================================================
--- branches/spatial/scipy/spatial/tests/test_kdtree.py	2008-11-03 07:30:45 UTC (rev 4956)
+++ trunk/scipy/spatial/tests/test_kdtree.py	2008-11-03 09:06:52 UTC (rev 4957)
@@ -0,0 +1,428 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+from numpy.testing import *
+
+import numpy as np
+from scipy.spatial import KDTree, Rectangle, distance_matrix, cKDTree
+from scipy.spatial import minkowski_distance as distance
+
+class ConsistencyTests:
+    def test_nearest(self):
+        x = self.x
+        d, i = self.kdtree.query(x, 1)
+        assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+        eps = 1e-8
+        assert np.all(np.sum((self.data-x[np.newaxis,:])**2,axis=1)>d**2-eps)
+        
+    def test_m_nearest(self):
+        x = self.x
+        m = self.m
+        dd, ii = self.kdtree.query(x, m)
+        d = np.amax(dd)
+        i = ii[np.argmax(dd)]
+        assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+        eps = 1e-8
+        assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),m)
+
+    def test_points_near(self):
+        x = self.x
+        d = self.d
+        dd, ii = self.kdtree.query(x, k=self.kdtree.n, distance_upper_bound=d)
+        eps = 1e-8
+        hits = 0
+        for near_d, near_i in zip(dd,ii):
+            if near_d==np.inf:
+                continue
+            hits += 1
+            assert_almost_equal(near_d**2,np.sum((x-self.data[near_i])**2))
+            assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+        assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),hits)
+
+    def test_points_near_l1(self):
+        x = self.x
+        d = self.d
+        dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=1, distance_upper_bound=d)
+        eps = 1e-8
+        hits = 0
+        for near_d, near_i in zip(dd,ii):
+            if near_d==np.inf:
+                continue
+            hits += 1
+            assert_almost_equal(near_d,distance(x,self.data[near_i],1))
+            assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+        assert_equal(np.sum(distance(self.data,x,1)<d+eps),hits)
+    def test_points_near_linf(self):
+        x = self.x
+        d = self.d
+        dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=np.inf, distance_upper_bound=d)
+        eps = 1e-8
+        hits = 0
+        for near_d, near_i in zip(dd,ii):
+            if near_d==np.inf:
+                continue
+            hits += 1
+            assert_almost_equal(near_d,distance(x,self.data[near_i],np.inf))
+            assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+        assert_equal(np.sum(distance(self.data,x,np.inf)<d+eps),hits)
+
+    def test_approx(self):
+        x = self.x
+        k = self.k
+        eps = 0.1
+        d_real, i_real = self.kdtree.query(x, k)
+        d, i = self.kdtree.query(x, k, eps=eps)
+        assert np.all(d<=d_real*(1+eps))
+
+    
+class test_random(ConsistencyTests):
+    def setUp(self):
+        self.n = 100
+        self.m = 4
+        self.data = np.random.randn(self.n, self.m)
+        self.kdtree = KDTree(self.data,leafsize=2)
+        self.x = np.random.randn(self.m)
+        self.d = 0.2
+        self.k = 10
+
+class test_random_far(test_random):
+    def setUp(self):
+        test_random.setUp(self)
+        self.x = np.random.randn(self.m)+10
+
+class test_small(ConsistencyTests):
+    def setUp(self):
+        self.data = np.array([[0,0,0],
+                              [0,0,1],
+                              [0,1,0],
+                              [0,1,1],
+                              [1,0,0],
+                              [1,0,1],
+                              [1,1,0],
+                              [1,1,1]])
+        self.kdtree = KDTree(self.data)
+        self.n = self.kdtree.n
+        self.m = self.kdtree.m
+        self.x = np.random.randn(3)
+        self.d = 0.5
+        self.k = 4
+
+    def test_nearest(self):
+        assert_array_equal(
+                self.kdtree.query((0,0,0.1), 1),
+                (0.1,0))
+    def test_nearest_two(self):
+        assert_array_equal(
+                self.kdtree.query((0,0,0.1), 2),
+                ([0.1,0.9],[0,1]))
+class test_small_nonleaf(test_small):
+    def setUp(self):
+        test_small.setUp(self)
+        self.kdtree = KDTree(self.data,leafsize=1)
+
+class test_small_compiled(test_small):
+    def setUp(self):
+        test_small.setUp(self)
+        self.kdtree = cKDTree(self.data)
+class test_small_nonleaf_compiled(test_small):
+    def setUp(self):
+        test_small.setUp(self)
+        self.kdtree = cKDTree(self.data,leafsize=1)
+class test_random_compiled(test_random):
+    def setUp(self):
+        test_random.setUp(self)
+        self.kdtree = cKDTree(self.data)
+class test_random_far_compiled(test_random_far):
+    def setUp(self):
+        test_random_far.setUp(self)
+        self.kdtree = cKDTree(self.data)
+
+class test_vectorization:
+    def setUp(self):
+        self.data = np.array([[0,0,0],
+                              [0,0,1],
+                              [0,1,0],
+                              [0,1,1],
+                              [1,0,0],
+                              [1,0,1],
+                              [1,1,0],
+                              [1,1,1]])
+        self.kdtree = KDTree(self.data)
+
+    def test_single_query(self):
+        d, i = self.kdtree.query([0,0,0])
+        assert isinstance(d,float)
+        assert isinstance(i,int)
+
+    def test_vectorized_query(self):
+        d, i = self.kdtree.query(np.zeros((2,4,3)))
+        assert_equal(np.shape(d),(2,4))
+        assert_equal(np.shape(i),(2,4))
+
+    def test_single_query_multiple_neighbors(self):
+        s = 23
+        kk = self.kdtree.n+s
+        d, i = self.kdtree.query([0,0,0],k=kk)
+        assert_equal(np.shape(d),(kk,))
+        assert_equal(np.shape(i),(kk,))
+        assert np.all(~np.isfinite(d[-s:]))
+        assert np.all(i[-s:]==self.kdtree.n)
+    def test_vectorized_query_multiple_neighbors(self):
+        s = 23
+        kk = self.kdtree.n+s
+        d, i = self.kdtree.query(np.zeros((2,4,3)),k=kk)
+        assert_equal(np.shape(d),(2,4,kk))
+        assert_equal(np.shape(i),(2,4,kk))
+        assert np.all(~np.isfinite(d[:,:,-s:]))
+        assert np.all(i[:,:,-s:]==self.kdtree.n)
+    def test_single_query_all_neighbors(self):
+        d, i = self.kdtree.query([0,0,0],k=None,distance_upper_bound=1.1)
+        assert isinstance(d,list)
+        assert isinstance(i,list)
+    def test_vectorized_query_all_neighbors(self):
+        d, i = self.kdtree.query(np.zeros((2,4,3)),k=None,distance_upper_bound=1.1)
+        assert_equal(np.shape(d),(2,4))
+        assert_equal(np.shape(i),(2,4))
+
+        assert isinstance(d[0,0],list)
+        assert isinstance(i[0,0],list)
+
+class test_vectorization_compiled:
+    def setUp(self):
+        self.data = np.array([[0,0,0],
+                              [0,0,1],
+                              [0,1,0],
+                              [0,1,1],
+                              [1,0,0],
+                              [1,0,1],
+                              [1,1,0],
+                              [1,1,1]])
+        self.kdtree = KDTree(self.data)
+
+    def test_single_query(self):
+        d, i = self.kdtree.query([0,0,0])
+        assert isinstance(d,float)
+        assert isinstance(i,int)
+
+    def test_vectorized_query(self):
+        d, i = self.kdtree.query(np.zeros((2,4,3)))
+        assert_equal(np.shape(d),(2,4))
+        assert_equal(np.shape(i),(2,4))
+
+    def test_single_query_multiple_neighbors(self):
+        s = 23
+        kk = self.kdtree.n+s
+        d, i = self.kdtree.query([0,0,0],k=kk)
+        assert_equal(np.shape(d),(kk,))
+        assert_equal(np.shape(i),(kk,))
+        assert np.all(~np.isfinite(d[-s:]))
+        assert np.all(i[-s:]==self.kdtree.n)
+    def test_vectorized_query_multiple_neighbors(self):
+        s = 23
+        kk = self.kdtree.n+s
+        d, i = self.kdtree.query(np.zeros((2,4,3)),k=kk)
+        assert_equal(np.shape(d),(2,4,kk))
+        assert_equal(np.shape(i),(2,4,kk))
+        assert np.all(~np.isfinite(d[:,:,-s:]))
+        assert np.all(i[:,:,-s:]==self.kdtree.n)
+
+class ball_consistency:
+
+    def test_in_ball(self):
+        l = self.T.query_ball_point(self.x, self.d, p=self.p, eps=self.eps)
+        for i in l:
+            assert distance(self.data[i],self.x,self.p)<=self.d*(1.+self.eps)
+
+    def test_found_all(self):
+        c = np.ones(self.T.n,dtype=np.bool)
+        l = self.T.query_ball_point(self.x, self.d, p=self.p, eps=self.eps)
+        c[l] = False
+        assert np.all(distance(self.data[c],self.x,self.p)>=self.d/(1.+self.eps))
+
+class test_random_ball(ball_consistency):
+
+    def setUp(self):
+        n = 100
+        m = 4
+        self.data = np.random.randn(n,m)
+        self.T = KDTree(self.data,leafsize=2)
+        self.x = np.random.randn(m)
+        self.p = 2.
+        self.eps = 0
+        self.d = 0.2
+
+class test_random_ball_approx(test_random_ball):
+
+    def setUp(self):
+        test_random_ball.setUp(self)
+        self.eps = 0.1
+
+class test_random_ball_far(test_random_ball):
+
+    def setUp(self):
+        test_random_ball.setUp(self)
+        self.d = 2.
+
+class test_random_ball_l1(test_random_ball):
+
+    def setUp(self):
+        test_random_ball.setUp(self)
+        self.p = 1
+
+class test_random_ball_linf(test_random_ball):
+
+    def setUp(self):
+        test_random_ball.setUp(self)
+        self.p = np.inf
+
+def test_random_ball_vectorized():
+
+    n = 20
+    m = 5
+    T = KDTree(np.random.randn(n,m))
+    
+    r = T.query_ball_point(np.random.randn(2,3,m),1)
+    assert_equal(r.shape,(2,3))
+    assert isinstance(r[0,0],list)
+
+class two_trees_consistency:
+
+    def test_all_in_ball(self):
+        r = self.T1.query_ball_tree(self.T2, self.d, p=self.p, eps=self.eps)
+        for i, l in enumerate(r):
+            for j in l:
+                assert distance(self.data1[i],self.data2[j],self.p)<=self.d*(1.+self.eps)
+    def test_found_all(self):
+        r = self.T1.query_ball_tree(self.T2, self.d, p=self.p, eps=self.eps)
+        for i, l in enumerate(r):
+            c = np.ones(self.T2.n,dtype=np.bool)
+            c[l] = False
+            assert np.all(distance(self.data2[c],self.data1[i],self.p)>=self.d/(1.+self.eps))
+
+class test_two_random_trees(two_trees_consistency):
+
+    def setUp(self):
+        n = 50
+        m = 4
+        self.data1 = np.random.randn(n,m)
+        self.T1 = KDTree(self.data1,leafsize=2)
+        self.data2 = np.random.randn(n,m)
+        self.T2 = KDTree(self.data2,leafsize=2)
+        self.p = 2.
+        self.eps = 0
+        self.d = 0.2
+
+class test_two_random_trees_far(test_two_random_trees):
+
+    def setUp(self):
+        test_two_random_trees.setUp(self)
+        self.d = 2
+
+class test_two_random_trees_linf(test_two_random_trees):
+
+    def setUp(self):
+        test_two_random_trees.setUp(self)
+        self.p = np.inf
+
+
+class test_rectangle:
+
+    def setUp(self):
+        self.rect = Rectangle([0,0],[1,1])
+
+    def test_min_inside(self):
+        assert_almost_equal(self.rect.min_distance_point([0.5,0.5]),0)
+    def test_min_one_side(self):
+        assert_almost_equal(self.rect.min_distance_point([0.5,1.5]),0.5)
+    def test_min_two_sides(self):
+        assert_almost_equal(self.rect.min_distance_point([2,2]),np.sqrt(2))
+    def test_max_inside(self):
+        assert_almost_equal(self.rect.max_distance_point([0.5,0.5]),1/np.sqrt(2))
+    def test_max_one_side(self):
+        assert_almost_equal(self.rect.max_distance_point([0.5,1.5]),np.hypot(0.5,1.5))
+    def test_max_two_sides(self):
+        assert_almost_equal(self.rect.max_distance_point([2,2]),2*np.sqrt(2))
+
+    def test_split(self):
+        less, greater = self.rect.split(0,0.1)
+        assert_array_equal(less.maxes,[0.1,1])
+        assert_array_equal(less.mins,[0,0])
+        assert_array_equal(greater.maxes,[1,1])
+        assert_array_equal(greater.mins,[0.1,0])
+
+
+def test_distance_l2():
+    assert_almost_equal(distance([0,0],[1,1],2),np.sqrt(2))
+def test_distance_l1():
+    assert_almost_equal(distance([0,0],[1,1],1),2)
+def test_distance_linf():
+    assert_almost_equal(distance([0,0],[1,1],np.inf),1)
+def test_distance_vectorization():
+    x = np.random.randn(10,1,3)
+    y = np.random.randn(1,7,3)
+    assert_equal(distance(x,y).shape,(10,7))
+
+class test_count_neighbors:
+
+    def setUp(self):
+        n = 50
+        m = 2
+        self.T1 = KDTree(np.random.randn(n,m),leafsize=2)
+        self.T2 = KDTree(np.random.randn(n,m),leafsize=2)
+        
+    def test_one_radius(self):
+        r = 0.2
+        assert_equal(self.T1.count_neighbors(self.T2, r),
+                np.sum([len(l) for l in self.T1.query_ball_tree(self.T2,r)]))
+
+    def test_large_radius(self):
+        r = 1000
+        assert_equal(self.T1.count_neighbors(self.T2, r),
+                np.sum([len(l) for l in self.T1.query_ball_tree(self.T2,r)]))
+
+    def test_multiple_radius(self):
+        rs = np.exp(np.linspace(np.log(0.01),np.log(10),3))
+        results = self.T1.count_neighbors(self.T2, rs)
+        assert np.all(np.diff(results)>=0)
+        for r,result in zip(rs, results):
+            assert_equal(self.T1.count_neighbors(self.T2, r), result)
+
+class test_sparse_distance_matrix:
+    def setUp(self):
+        n = 50
+        m = 4
+        self.T1 = KDTree(np.random.randn(n,m),leafsize=2)
+        self.T2 = KDTree(np.random.randn(n,m),leafsize=2)
+        self.r = 0.3
+
+    def test_consistency_with_neighbors(self):
+        M = self.T1.sparse_distance_matrix(self.T2, self.r)
+        r = self.T1.query_ball_tree(self.T2, self.r)
+        for i,l in enumerate(r):
+            for j in l:
+                assert_equal(M[i,j],distance(self.T1.data[i],self.T2.data[j]))
+        for ((i,j),d) in M.items():
+            assert j in r[i]
+
+def test_distance_matrix():
+    m = 10
+    n = 11
+    k = 4
+    xs = np.random.randn(m,k)
+    ys = np.random.randn(n,k)
+    ds = distance_matrix(xs,ys)
+    assert_equal(ds.shape, (m,n))
+    for i in range(m):
+        for j in range(n):
+            assert_equal(distance(xs[i],ys[j]),ds[i,j])
+def test_distance_matrix_looping():
+    m = 10
+    n = 11
+    k = 4
+    xs = np.random.randn(m,k)
+    ys = np.random.randn(n,k)
+    ds = distance_matrix(xs,ys)
+    dsl = distance_matrix(xs,ys,threshold=1)
+    assert_equal(ds,dsl)
+
+if __name__=="__main__":
+    run_module_suite()


Property changes on: trunk/scipy/spatial/tests/test_kdtree.py
___________________________________________________________________
Name: svn:mergeinfo
   + 




More information about the Scipy-svn mailing list