[Numpy-svn] r3644 - in trunk/numpy/lib: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Mon Apr 2 14:25:55 EDT 2007
Author: oliphant
Date: 2007-04-02 13:25:50 -0500 (Mon, 02 Apr 2007)
New Revision: 3644
Modified:
trunk/numpy/lib/function_base.py
trunk/numpy/lib/tests/test_function_base.py
trunk/numpy/lib/tests/test_twodim_base.py
trunk/numpy/lib/twodim_base.py
Log:
Add patch in Ticket #189 for histogramdd. Fixes bug reported by Ben Granett
Modified: trunk/numpy/lib/function_base.py
===================================================================
--- trunk/numpy/lib/function_base.py 2007-04-02 13:27:42 UTC (rev 3643)
+++ trunk/numpy/lib/function_base.py 2007-04-02 18:25:50 UTC (rev 3644)
@@ -1,3 +1,4 @@
+__docformat__ = "restructuredtext en"
__all__ = ['logspace', 'linspace',
'select', 'piecewise', 'trim_zeros',
'copy', 'iterable', #'base_repr', 'binary_repr',
@@ -117,46 +118,49 @@
else:
return n, bins
-def histogramdd(sample, bins=10, range=None, normed=False):
- """histogramdd(sample, bins = 10, range = None, normed = False) -> H, edges
+def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
+ """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
- Return the D-dimensional histogram computed from sample.
+ Return the D-dimensional histogram of the sample.
- Parameters
- ----------
- sample: A sequence of D arrays, or an NxD array.
- bins: A sequence of edge arrays, or a sequence of the number of bins.
- If a scalar is given, it is assumed to be the number of bins
- for all dimensions.
- range: A sequence of lower and upper bin edges (default: [min, max]).
- normed: If False, returns the number of samples in each bin.
- If True, returns the frequency distribution.
+ :Parameters:
+ - `sample` : A sequence of D arrays, or an NxD array.
+ - `bins` : A sequence of edge arrays, a sequence of bin number,
+ or a scalar (the number of bins for all dimensions.)
+ - `range` : A sequence of lower and upper bin edges (default: [min, max]).
+ - `normed` : Boolean, if False, return the number of samples in each bin,
+ if True, returns the density.
+ - `weights` : An array of weights. The weights are normed only if normed is True.
+ Should weights.sum() not equal N, the total bin count will
+ not be equal to the number of samples.
+ :Return:
+ - `hist` : Histogram array.
+ - `edges` : List of arrays defining the bin edges.
+
- Output
- ------
- H: Histogram array.
- edges: List of arrays defining the bin edges.
-
Example:
- x = random.randn(100,3)
- H, edges = histogramdd(x, bins = (5, 6, 7))
+ >>> x = random.randn(100,3)
+ >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
- See also: histogram
+ :SeeAlso: histogram
"""
- try:
+ try:
+ # Sample is an ND-array.
N, D = sample.shape
- except (AttributeError, ValueError):
- ss = atleast_2d(sample)
- sample = ss.transpose()
+ except (AttributeError, ValueError):
+ # Sample is a sequence of 1D arrays.
+ sample = atleast_2d(sample).T
N, D = sample.shape
nbin = empty(D, int)
edges = D*[None]
dedges = D*[None]
-
+ if weights is not None:
+ weights = asarray(weights)
+
try:
M = len(bins)
if M != D:
@@ -164,6 +168,8 @@
except TypeError:
bins = D*[bins]
+ # Select range for each dimension
+ # Used only if number of bins is given.
if range is None:
smin = atleast_1d(sample.min(0))
smax = atleast_1d(sample.max(0))
@@ -173,39 +179,37 @@
for i in arange(D):
smin[i], smax[i] = range[i]
+ # Create edge arrays
for i in arange(D):
if isscalar(bins[i]):
- nbin[i] = bins[i]
- edges[i] = linspace(smin[i], smax[i], nbin[i]+1)
+ nbin[i] = bins[i] + 2 # +2 for outlier bins
+ edges[i] = linspace(smin[i], smax[i], nbin[i]-1)
else:
edges[i] = asarray(bins[i], float)
- nbin[i] = len(edges[i])-1
-
-
-
+ nbin[i] = len(edges[i])+1 # +1 for outlier bins
+ dedges[i] = diff(edges[i])
+
+ nbin = asarray(nbin)
+
+ # Compute the bin number each sample falls into.
Ncount = {}
- nbin = asarray(nbin)
-
for i in arange(D):
Ncount[i] = digitize(sample[:,i], edges[i])
- dedges[i] = diff(edges[i])
- # Remove values falling outside of bins
- # Values that fall on an edge are put in the right bin.
+
+ # Using digitize, values that fall on an edge are put in the right bin.
# For the rightmost bin, we want values equal to the right
# edge to be counted in the last bin, and not as an outlier.
outliers = zeros(N, int)
for i in arange(D):
+ # Rounding precision
decimal = int(-log10(dedges[i].min())) +6
+ # Find which points are on the rightmost edge.
on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
+ # Shift these points one bin to the left.
Ncount[i][on_edge] -= 1
- outliers += (Ncount[i] == 0) | (Ncount[i] == nbin[i]+1)
- indices = where(outliers == 0)[0]
- for i in arange(D):
- Ncount[i] = Ncount[i][indices] - 1
- N = len(indices)
# Flattened histogram matrix (1D)
- hist = zeros(nbin.prod(), int)
+ hist = zeros(nbin.prod(), float)
# Compute the sample indices in the flattened histogram matrix.
ni = nbin.argsort()
@@ -213,29 +217,33 @@
xy = zeros(N, int)
for i in arange(0, D-1):
xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
-
xy += Ncount[ni[-1]]
# Compute the number of repetitions in xy and assign it to the flattened histmat.
if len(xy) == 0:
- return zeros(nbin, int)
+ return zeros(nbin-2, int), edges
- flatcount = bincount(xy)
+ flatcount = bincount(xy, weights)
a = arange(len(flatcount))
hist[a] = flatcount
# Shape into a proper matrix
hist = hist.reshape(sort(nbin))
- for i,j in enumerate(ni):
+ for i in arange(nbin.size):
+ j = ni[i]
hist = hist.swapaxes(i,j)
- if (hist.shape == nbin).all():
- break
+ ni[i],ni[j] = ni[j],ni[i]
+ # Remove outliers (indices 0 and -1 for each dimension).
+ core = D*[slice(1,-1)]
+ hist = hist[core]
+
+ # Normalize if normed is True
if normed:
s = hist.sum()
for i in arange(D):
shape = ones(D, int)
- shape[i] = nbin[i]
+ shape[i] = nbin[i]-2
hist = hist / dedges[i].reshape(shape)
hist /= s
Modified: trunk/numpy/lib/tests/test_function_base.py
===================================================================
--- trunk/numpy/lib/tests/test_function_base.py 2007-04-02 13:27:42 UTC (rev 3643)
+++ trunk/numpy/lib/tests/test_function_base.py 2007-04-02 18:25:50 UTC (rev 3644)
@@ -365,7 +365,7 @@
[.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]])
H, edges = histogramdd(x, (2,3,3), range = [[-1,1], [0,3], [0,3]])
answer = asarray([[[0,1,0], [0,0,1], [1,0,0]], [[0,1,0], [0,0,1], [0,0,1]]])
- assert(all(H == answer))
+ assert_array_equal(H,answer)
# Check normalization
ed = [[-2,0,2], [0,1,2,3], [0,1,2,3]]
H, edges = histogramdd(x, bins = ed, normed = True)
@@ -383,6 +383,27 @@
[[0,0],[0,0],[0,0]]])
assert_array_equal(H, answer)
+ Z = zeros((5,5,5))
+ Z[range(5), range(5), range(5)] = 1.
+ H,edges = histogramdd([arange(5), arange(5), arange(5)], 5)
+ assert_array_equal(H, Z)
+
+ def check_shape(self):
+ x = rand(100,3)
+ hist3d, edges = histogramdd(x, bins = (5, 7, 6))
+ assert_array_equal(hist3d.shape, (5,7,6))
+
+ def check_weights(self):
+ v = rand(100,2)
+ hist, edges = histogramdd(v)
+ n_hist, edges = histogramdd(v, normed=True)
+ w_hist, edges = histogramdd(v, weights=ones(100))
+ assert_array_equal(w_hist, hist)
+ w_hist, edges = histogramdd(v, weights=ones(100)*2, normed=True)
+ assert_array_equal(w_hist, n_hist)
+ w_hist, edges = histogramdd(v, weights=ones(100, int)*2)
+ assert_array_equal(w_hist, 2*hist)
+
class test_unique(NumpyTestCase):
def check_simple(self):
Modified: trunk/numpy/lib/tests/test_twodim_base.py
===================================================================
--- trunk/numpy/lib/tests/test_twodim_base.py 2007-04-02 13:27:42 UTC (rev 3643)
+++ trunk/numpy/lib/tests/test_twodim_base.py 2007-04-02 18:25:50 UTC (rev 3644)
@@ -149,6 +149,13 @@
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])
assert_array_equal(H.T, answer)
+ H = histogram2d(x, y, xedges)[0]
+ assert_array_equal(H.T, answer)
+ H,xedges,yedges = histogram2d(range(10),range(10))
+ assert_array_equal(H, eye(10,10))
+ assert_array_equal(xedges, np.linspace(0,9,11))
+ assert_array_equal(yedges, np.linspace(0,9,11))
+
def check_asym(self):
x = array([1, 1, 2, 3, 4, 4, 4, 5])
y = array([1, 3, 2, 0, 1, 2, 3, 4])
@@ -160,6 +167,8 @@
[0,1,1,1,0],
[0,0,0,0,1]])
assert_array_almost_equal(H, answer/8., 3)
+ assert_array_equal(xed, np.linspace(0,6,7))
+ assert_array_equal(yed, np.linspace(0,5,6))
def check_norm(self):
x = array([1,2,3,1,2,3,1,2,3])
y = array([1,1,1,2,2,2,3,3,3])
@@ -169,5 +178,10 @@
[.5,.5,.25]])/9.
assert_array_almost_equal(H, answer, 3)
+ def check_all_outliers(self):
+ r = rand(100)+1.
+ H, xed, yed = histogram2d(r, r, (4, 5), range=([0,1], [0,1]))
+ assert_array_equal(H, 0)
+
if __name__ == "__main__":
NumpyTest().run()
Modified: trunk/numpy/lib/twodim_base.py
===================================================================
--- trunk/numpy/lib/twodim_base.py 2007-04-02 13:27:42 UTC (rev 3643)
+++ trunk/numpy/lib/twodim_base.py 2007-04-02 18:25:50 UTC (rev 3644)
@@ -143,105 +143,42 @@
X[:,i] = x**(N-i-1)
return X
-def histogram2d(x,y, bins=10, range=None, normed=False):
+
+def histogram2d(x,y, bins=10, range=None, normed=False, weights=None):
"""histogram2d(x,y, bins=10, range=None, normed=False) -> H, xedges, yedges
Compute the 2D histogram from samples x,y.
- Parameters
- ----------
- x,y: 1D data series. Both arrays must have the same length.
- bins: Number of bins -or- [nbin x, nbin y] -or-
- [bin edges] -or- [x bin edges, y bin edges].
- range: A sequence of lower and upper bin edges (default: [min, max]).
- normed: True or False.
-
- The histogram array is a count of the number of samples in each
- two dimensional bin.
- Setting normed to True returns a density rather than a bin count.
+ :Parameters:
+ - `x,y` : Sample arrays (1D).
+ - `bins` : Number of bins -or- [nbin x, nbin y] -or-
+ [bin edges] -or- [x bin edges, y bin edges].
+ - `range` : A sequence of lower and upper bin edges (default: [min, max]).
+ - `normed` : Boolean, if False, return the number of samples in each bin,
+ if True, returns the density.
+ - `weights` : An array of weights. The weights are normed only if normed
+ is True. Should weights.sum() not equal N, the total bin count \
+ will not be equal to the number of samples.
+
+ :Return:
+ - `hist` : Histogram array.
+ - `xedges, yedges` : Arrays defining the bin edges.
+
+ Example:
+ >>> x = random.randn(100,2)
+ >>> hist2d, xedges, yedges = histogram2d(x, bins = (6, 7))
+
+ :SeeAlso: histogramdd
"""
- import numpy as np
+ from numpy import histogramdd
+
try:
N = len(bins)
except TypeError:
N = 1
- bins = [bins]
- x = asarray(x)
- y = asarray(y)
- if range is None:
- xmin, xmax = x.min(), x.max()
- ymin, ymax = y.min(), y.max()
- else:
- xmin, xmax = range[0]
- ymin, ymax = range[1]
- if N == 2:
- if np.isscalar(bins[0]):
- xnbin = bins[0]
- xedges = np.linspace(xmin, xmax, xnbin+1)
- else:
- xedges = asarray(bins[0], float)
- xnbin = len(xedges)-1
- if np.isscalar(bins[1]):
- ynbin = bins[1]
- yedges = np.linspace(ymin, ymax, ynbin+1)
- else:
- yedges = asarray(bins[1], float)
- ynbin = len(yedges)-1
- elif N == 1:
- ynbin = xnbin = bins[0]
- xedges = np.linspace(xmin, xmax, xnbin+1)
- yedges = np.linspace(ymin, ymax, ynbin+1)
- else:
- yedges = asarray(bins, float)
- xedges = yedges.copy()
- ynbin = len(yedges)-1
- xnbin = len(xedges)-1
- dxedges = np.diff(xedges)
- dyedges = np.diff(yedges)
-
- # Flattened histogram matrix (1D)
- hist = np.zeros((xnbin)*(ynbin), int)
-
- # Count the number of sample in each bin (1D)
- xbin = np.digitize(x,xedges)
- ybin = np.digitize(y,yedges)
-
- # Values that fall on an edge are put in the right bin.
- # For the rightmost bin, we want values equal to the right
- # edge to be counted in the last bin, and not as an outlier.
- xdecimal = int(-np.log10(dxedges.min()))+6
- ydecimal = int(-np.log10(dyedges.min()))+6
- on_edge_x = np.where(np.around(x,xdecimal) == np.around(xedges[-1], xdecimal))[0]
- on_edge_y = np.where(np.around(y,ydecimal) == np.around(yedges[-1], ydecimal))[0]
- xbin[on_edge_x] -= 1
- ybin[on_edge_y] -= 1
- # Remove the true outliers
- outliers = (xbin==0) | (xbin==xnbin+1) | (ybin==0) | (ybin == ynbin+1)
- xbin = xbin[outliers==False] - 1
- ybin = ybin[outliers==False] - 1
-
- # Compute the sample indices in the flattened histogram matrix.
- if xnbin >= ynbin:
- xy = ybin*(xnbin) + xbin
-
- else:
- xy = xbin*(ynbin) + ybin
-
-
- # Compute the number of repetitions in xy and assign it to the flattened
- # histogram matrix.
-
- flatcount = np.bincount(xy)
- indices = np.arange(len(flatcount))
- hist[indices] = flatcount
-
- shape = np.sort([xnbin, ynbin])
- # Shape into a proper matrix
- histmat = hist.reshape(shape)
- if (shape == (ynbin, xnbin)).all():
- histmat = histmat.T
- if normed:
- diff2 = np.outer(dxedges, dyedges)
- histmat = histmat / diff2 / histmat.sum()
- return histmat, xedges, yedges
+ if N != 1 and N != 2:
+ xedges = yedges = asarray(bins, float)
+ bins = [xedges, yedges]
+ hist, edges = histogramdd([x,y], bins, range, normed, weights)
+ return hist, edges[0], edges[1]
More information about the Numpy-svn
mailing list