[Scipy-svn] r4759 - in trunk/scipy: . spatial spatial/tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Sep 30 19:55:26 EDT 2008
Author: peridot
Date: 2008-09-30 18:55:21 -0500 (Tue, 30 Sep 2008)
New Revision: 4759
Added:
trunk/scipy/spatial/
trunk/scipy/spatial/__init__.py
trunk/scipy/spatial/info.py
trunk/scipy/spatial/kdtree.py
trunk/scipy/spatial/setup.py
trunk/scipy/spatial/tests/
trunk/scipy/spatial/tests/test_kdtree.py
Modified:
trunk/scipy/__init__.py
trunk/scipy/setup.py
Log:
New module for spatial data structure. Currently contains one pure-python implementation of a kd-tree.
Modified: trunk/scipy/__init__.py
===================================================================
--- trunk/scipy/__init__.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/__init__.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -66,7 +66,7 @@
# Remove subpackage names from __all__ such that they are not imported via
# "from scipy import *". This works around a numpy bug present in < 1.2.
subpackages = """cluster constants fftpack integrate interpolate io lib linalg
-linsolve maxentropy misc ndimage odr optimize signal sparse special
+linsolve maxentropy misc ndimage odr optimize signal sparse special spatial
splinalg stats stsci weave""".split()
for name in subpackages:
try:
Modified: trunk/scipy/setup.py
===================================================================
--- trunk/scipy/setup.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/setup.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -18,6 +18,7 @@
config.add_subpackage('signal')
config.add_subpackage('sparse')
config.add_subpackage('special')
+ config.add_subpackage('spatial')
config.add_subpackage('stats')
config.add_subpackage('ndimage')
config.add_subpackage('stsci')
Added: trunk/scipy/spatial/__init__.py
===================================================================
--- trunk/scipy/spatial/__init__.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/spatial/__init__.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -0,0 +1,13 @@
+#
+# spatial - spatial data structures and algorithms
+#
+
+from info import __doc__
+
+from kdtree import *
+
+
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+from numpy.testing import Tester
+test = Tester().test
+
Added: trunk/scipy/spatial/info.py
===================================================================
--- trunk/scipy/spatial/info.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/spatial/info.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -0,0 +1,12 @@
+"""
+Spatial data structures and algorithms
+======================================
+
+Nearest-neighbor queries:
+
+ KDTree -- class for efficient nearest-neighbor queries
+ distance -- function for computing Minkowski p-norms
+
+"""
+
+postpone_import = 1
Added: trunk/scipy/spatial/kdtree.py
===================================================================
--- trunk/scipy/spatial/kdtree.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/spatial/kdtree.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -0,0 +1,305 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import numpy as np
+from heapq import heappush, heappop
+
+def distance_p(x,y,p=2):
+ if p==np.inf:
+ return np.amax(np.abs(y-x),axis=-1)
+ elif p==1:
+ return np.sum(np.abs(y-x),axis=-1)
+ else:
+ return np.sum(np.abs(y-x)**p,axis=-1)
+def distance(x,y,p=2):
+ if p==np.inf or p==1:
+ return distance_p(x,y,p)
+ else:
+ return distance_p(x,y,p)**(1./p)
+
+
+class KDTree(object):
+ """kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary trie, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+ """
+
+ def __init__(self, data, leafsize=10):
+ """Construct a kd-tree.
+
+ Parameters:
+ ===========
+
+ data : array-like, shape (n,k)
+ The data points to be indexed. This array is not copied, and
+ so modifying this data will result in bogus results.
+ leafsize : positive integer
+ The number of points at which the algorithm switches over to
+ brute-force.
+ """
+ self.data = np.asarray(data)
+ self.n, self.k = np.shape(self.data)
+ self.leafsize = int(leafsize)
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.amax(self.data,axis=0)
+ self.mins = np.amin(self.data,axis=0)
+
+ self.tree = self.__build(np.arange(self.n), self.maxes, self.mins)
+
+ class node(object):
+ pass
+ class leafnode(node):
+ def __init__(self, idx):
+ self.idx = idx
+ class innernode(node):
+ def __init__(self, split_dim, split, less, greater):
+ self.split_dim = split_dim
+ self.split = split
+ self.less = less
+ self.greater = greater
+
+ def __build(self, idx, maxes, mins):
+ if len(idx)<=self.leafsize:
+ return KDTree.leafnode(idx)
+ else:
+ data = self.data[idx]
+ #maxes = np.amax(data,axis=0)
+ #mins = np.amin(data,axis=0)
+ d = np.argmax(maxes-mins)
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ return KDTree.leafnode(idx)
+ data = data[:,d]
+
+ # sliding midpoint rule; see Maneewongvatana and Mount 1999
+ # for arguments that this is a good idea.
+ split = (maxval+minval)/2
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(less_idx)==0:
+ split = np.amin(data)
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(greater_idx)==0:
+ split = np.amax(data)
+ less_idx = np.nonzero(data<split)[0]
+ greater_idx = np.nonzero(data>=split)[0]
+ if len(less_idx)==0:
+ # _still_ zero? all must have the same value
+ assert np.all(data==data[0]), "Troublesome data array: %s" % data
+ split = data[0]
+ less_idx = np.arange(len(data)-1)
+ greater_idx = np.array([len(data)-1])
+
+ lessmaxes = np.copy(maxes)
+ lessmaxes[d] = split
+ greatermins = np.copy(mins)
+ greatermins[d] = split
+ return KDTree.innernode(d, split,
+ self.__build(idx[less_idx],lessmaxes,mins),
+ self.__build(idx[greater_idx],maxes,greatermins))
+
+ def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+
+ side_distances = [max(0,x[i]-self.maxes[i],self.mins[i]-x[i]) for i in range(self.k)]
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ q = [(distance_p(np.array(side_distances),0),
+ tuple(side_distances),
+ self.tree)]
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ neighbors = []
+
+ if eps==0:
+ epsfac=1
+ elif p==np.inf:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ if p!=np.inf and distance_upper_bound!=np.inf:
+ distance_upper_bound = distance_upper_bound**p
+
+ while q:
+ min_distance, side_distances, node = heappop(q)
+ if isinstance(node, KDTree.leafnode):
+ # brute-force
+ data = self.data[node.idx]
+ a = np.abs(data-x[np.newaxis,:])
+ if p==np.inf:
+ ds = np.amax(a,axis=1)
+ elif p==1:
+ ds = np.sum(a,axis=1)
+ else:
+ ds = np.sum(a**p,axis=1)
+ for i in range(len(ds)):
+ if ds[i]<distance_upper_bound:
+ if len(neighbors)==k:
+ heappop(neighbors)
+ heappush(neighbors, (-ds[i], node.idx[i]))
+ if len(neighbors)==k:
+ distance_upper_bound = -neighbors[0][0]
+ else:
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ break
+ # compute minimum distances to the children and push them on
+ if x[node.split_dim]<node.split:
+ near, far = node.less, node.greater
+ else:
+ near, far = node.greater, node.less
+
+ # near child is at the same distance as the current node
+ heappush(q,(min_distance, side_distances, near))
+
+ # far child is further by an amount depending only
+ # on the split value
+ sd = list(side_distances)
+ if p == np.inf:
+ min_distance = max(min_distance, abs(node.split-x[node.split_dim]))
+ elif p == 1:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+ else:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+
+ # far child might be too far, if so, don't bother pushing it
+ if min_distance<=distance_upper_bound*epsfac:
+ heappush(q,(min_distance, tuple(sd), far))
+
+ if p==np.inf:
+ return sorted([(-d,i) for (d,i) in neighbors])
+ else:
+ return sorted([((-d)**(1./p),i) for (d,i) in neighbors])
+
+ def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+ """query the kd-tree for nearest neighbors
+
+ Parameters:
+ ===========
+
+ x : array-like, last dimension self.k
+ An array of points to query.
+ k : integer
+ The number of nearest neighbors to return.
+ eps : nonnegative float
+ Return approximate nearest neighbors; the kth returned value
+ is guaranteed to be no further than (1+eps) times the
+ distance to the real kth nearest neighbor.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance
+ 2 is the usual Euclidean distance
+ infinity is the maximum-coordinate-difference distance
+ distance_upper_bound : nonnegative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns:
+ ========
+
+ d : array of floats
+ The distances to the nearest neighbors.
+ If x has shape tuple+(self.k,), then d has shape tuple if
+ k is one, or tuple+(k,) if k is larger than one. Missing
+ neighbors are indicated with infinite distances. If k is None,
+ then d is an object array of shape tuple, containing lists
+ of distances. In either case the hits are sorted by distance
+ (nearest first).
+ i : array of integers
+ The locations of the neighbors in self.data. i is the same
+ shape as d.
+ """
+ x = np.asarray(x)
+ if np.shape(x)[-1] != self.k:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.k, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ retshape = np.shape(x)[:-1]
+ if retshape!=():
+ if k>1:
+ dd = np.empty(retshape+(k,),dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape+(k,),dtype=np.int)
+ ii.fill(self.n)
+ elif k==1:
+ dd = np.empty(retshape,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape,dtype=np.int)
+ ii.fill(self.n)
+ elif k is None:
+ dd = np.empty(retshape,dtype=np.object)
+ ii = np.empty(retshape,dtype=np.object)
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+ for c in np.ndindex(retshape):
+ hits = self.__query(x[c], k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k>1:
+ for j in range(len(hits)):
+ dd[c+(j,)], ii[c+(j,)] = hits[j]
+ elif k==1:
+ if len(hits)>0:
+ dd[c], ii[c] = hits[0]
+ else:
+ dd[c] = np.inf
+ ii[c] = self.n
+ elif k is None:
+ dd[c] = [d for (d,i) in hits]
+ ii[c] = [i for (d,i) in hits]
+ return dd, ii
+ else:
+ hits = self.__query(x, k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k==1:
+ if len(hits)>0:
+ return hits[0]
+ else:
+ return np.inf, self.n
+ elif k>1:
+ dd = np.empty(k,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(k,dtype=np.int)
+ ii.fill(self.n)
+ for j in range(len(hits)):
+ dd[j], ii[j] = hits[j]
+ return dd, ii
+ elif k is None:
+ return [d for (d,i) in hits], [i for (d,i) in hits]
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+
+
+
Copied: trunk/scipy/spatial/setup.py (from rev 4758, trunk/scipy/interpolate/setup.py)
===================================================================
--- trunk/scipy/interpolate/setup.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/spatial/setup.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+from os.path import join
+
+def configuration(parent_package='',top_path=None):
+ from numpy.distutils.misc_util import Configuration
+
+ config = Configuration('spatial', parent_package, top_path)
+
+ config.add_data_dir('tests')
+
+ return config
+
+if __name__ == '__main__':
+ from numpy.distutils.core import setup
+ setup(**configuration(top_path='').todict())
Property changes on: trunk/scipy/spatial/setup.py
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:keywords
+ Author Date Id Revision
Name: svn:mergeinfo
+
Name: svn:eol-style
+ native
Added: trunk/scipy/spatial/tests/test_kdtree.py
===================================================================
--- trunk/scipy/spatial/tests/test_kdtree.py 2008-09-30 19:14:59 UTC (rev 4758)
+++ trunk/scipy/spatial/tests/test_kdtree.py 2008-09-30 23:55:21 UTC (rev 4759)
@@ -0,0 +1,179 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+from numpy.testing import *
+
+import numpy as np
+from scipy.spatial import KDTree, distance
+
+class CheckSmall(NumpyTestCase):
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data)
+
+ def test_nearest(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 1),
+ (0.1,0))
+ def test_nearest_two(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 2),
+ ([0.1,0.9],[0,1]))
+class CheckSmallNonLeaf(NumpyTestCase):
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data,leafsize=1)
+
+ def test_nearest(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 1),
+ (0.1,0))
+ def test_nearest_two(self):
+ assert_array_equal(
+ self.kdtree.query((0,0,0.1), 2),
+ ([0.1,0.9],[0,1]))
+
+class CheckRandom(NumpyTestCase):
+ def setUp(self):
+ self.n = 1000
+ self.k = 4
+ self.data = np.random.randn(self.n, self.k)
+ self.kdtree = KDTree(self.data)
+
+ def test_nearest(self):
+ x = np.random.randn(self.k)
+ d, i = self.kdtree.query(x, 1)
+ assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+ eps = 1e-8
+ assert np.all(np.sum((self.data-x[np.newaxis,:])**2,axis=1)>d**2-eps)
+
+ def test_m_nearest(self):
+ x = np.random.randn(self.k)
+ m = 10
+ dd, ii = self.kdtree.query(x, m)
+ d = np.amax(dd)
+ i = ii[np.argmax(dd)]
+ assert_almost_equal(d**2,np.sum((x-self.data[i])**2))
+ eps = 1e-8
+ assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),m)
+
+ def test_points_near(self):
+ x = np.random.randn(self.k)
+ d = 0.2
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d**2,np.sum((x-self.data[near_i])**2))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(np.sum((self.data-x[np.newaxis,:])**2,axis=1)<d**2+eps),hits)
+
+ def test_points_near_l1(self):
+ x = np.random.randn(self.k)
+ d = 0.2
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=1, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d,distance(x,self.data[near_i],1))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(distance(self.data,x,1)<d+eps),hits)
+ def test_points_near_linf(self):
+ x = np.random.randn(self.k)
+ d = 0.2
+ dd, ii = self.kdtree.query(x, k=self.kdtree.n, p=np.inf, distance_upper_bound=d)
+ eps = 1e-8
+ hits = 0
+ for near_d, near_i in zip(dd,ii):
+ if near_d==np.inf:
+ continue
+ hits += 1
+ assert_almost_equal(near_d,distance(x,self.data[near_i],np.inf))
+ assert near_d<d+eps, "near_d=%g should be less than %g" % (near_d,d)
+ assert_equal(np.sum(distance(self.data,x,np.inf)<d+eps),hits)
+
+ def test_approx(self):
+ x = np.random.randn(self.k)
+ m = 10
+ eps = 0.1
+ d_real, i_real = self.kdtree.query(x, m)
+ d, i = self.kdtree.query(x, m, eps=eps)
+ assert np.all(d<=d_real*(1+eps))
+
+class CheckVectorization(NumpyTestCase):
+ def setUp(self):
+ self.data = np.array([[0,0,0],
+ [0,0,1],
+ [0,1,0],
+ [0,1,1],
+ [1,0,0],
+ [1,0,1],
+ [1,1,0],
+ [1,1,1]])
+ self.kdtree = KDTree(self.data)
+
+ def test_single_query(self):
+ d, i = self.kdtree.query([0,0,0])
+ assert isinstance(d,float)
+ assert isinstance(i,int)
+
+ def test_vectorized_query(self):
+ d, i = self.kdtree.query(np.zeros((2,4,3)))
+ assert_equal(np.shape(d),(2,4))
+ assert_equal(np.shape(i),(2,4))
+
+ def test_single_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query([0,0,0],k=kk)
+ assert_equal(np.shape(d),(kk,))
+ assert_equal(np.shape(i),(kk,))
+ assert np.all(~np.isfinite(d[-s:]))
+ assert np.all(i[-s:]==self.kdtree.n)
+ def test_vectorized_query_multiple_neighbors(self):
+ s = 23
+ kk = self.kdtree.n+s
+ d, i = self.kdtree.query(np.zeros((2,4,3)),k=kk)
+ assert_equal(np.shape(d),(2,4,kk))
+ assert_equal(np.shape(i),(2,4,kk))
+ assert np.all(~np.isfinite(d[:,:,-s:]))
+ assert np.all(i[:,:,-s:]==self.kdtree.n)
+ def test_single_query_all_neighbors(self):
+ d, i = self.kdtree.query([0,0,0],k=None,distance_upper_bound=1.1)
+ assert isinstance(d,list)
+ assert isinstance(i,list)
+ def test_vectorized_query_all_neighbors(self):
+ d, i = self.kdtree.query(np.zeros((2,4,3)),k=None,distance_upper_bound=1.1)
+ assert_equal(np.shape(d),(2,4))
+ assert_equal(np.shape(i),(2,4))
+
+ assert isinstance(d[0,0],list)
+ assert isinstance(i[0,0],list)
+
+
+
+
+if __name__=='__main__':
+ import unittest
+ unittest.main()
+
+
More information about the Scipy-svn
mailing list