[Numpy-svn] r3757 - in trunk/numpy/lib: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Sun May 13 19:22:20 EDT 2007
Author: charris
Date: 2007-05-13 18:22:17 -0500 (Sun, 13 May 2007)
New Revision: 3757
Modified:
trunk/numpy/lib/function_base.py
trunk/numpy/lib/tests/test_function_base.py
Log:
Add patch from dhuard to histogramdd. Fixes ticket #509.
Restructure restructured comments; avoid consolidated lists, they are too ugly
to contemplate and move around where they aren't wanted. They can be fixed later
if epydoc fixes things up.
Modified: trunk/numpy/lib/function_base.py
===================================================================
--- trunk/numpy/lib/function_base.py 2007-05-13 20:15:09 UTC (rev 3756)
+++ trunk/numpy/lib/function_base.py 2007-05-13 23:22:17 UTC (rev 3757)
@@ -71,29 +71,41 @@
def histogram(a, bins=10, range=None, normed=False):
"""Compute the histogram from a set of data.
- :Parameters:
- - `a` : array
- The data to histogram. n-D arrays will be flattened.
- - `bins` : int or sequence of floats, optional
- If an int, then the number of equal-width bins in the given range.
- Otherwise, a sequence of the lower bound of each bin.
- - `range` : (float, float), optional
- The lower and upper range of the bins. If not provided, then (a.min(),
- a.max()) is used. Values outside of this range are allocated to the
- closest bin.
- - `normed` : bool, optional
- If False, the result array will contain the number of samples in each bin.
- If True, the result array is the value of the probability *density*
- function at the bin normalized such that the *integral* over the range
- is 1. Note that the sum of all of the histogram values will not usually
- be 1; it is not a probability *mass* function.
+ Parameters:
- :Returns:
- - `hist` : array (n,)
- The values of the histogram. See `normed` for a description of the
- possible semantics.
- - `lower_edges` : float array (n,)
- The lower edges of each bin.
+ a : array
+ The data to histogram. n-D arrays will be flattened.
+
+ bins : int or sequence of floats
+ If an int, then the number of equal-width bins in the given range.
+ Otherwise, a sequence of the lower bound of each bin.
+
+ range : (float, float)
+ The lower and upper range of the bins. If not provided, then
+ (a.min(), a.max()) is used. Values outside of this range are
+ allocated to the closest bin.
+
+ normed : bool
+ If False, the result array will contain the number of samples in
+ each bin. If True, the result array is the value of the
+ probability *density* function at the bin normalized such that the
+ *integral* over the range is 1. Note that the sum of all of the
+ histogram values will not usually be 1; it is not a probability
+ *mass* function.
+
+ Returns:
+
+ hist : array
+ The values of the histogram. See `normed` for a description of the
+ possible semantics.
+
+ lower_edges : float array
+ The lower edges of each bin.
+
+ SeeAlso:
+
+ histogramdd
+
"""
a = asarray(a).ravel()
if not iterable(bins):
@@ -120,38 +132,54 @@
return n, bins
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
- """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
+ """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
- Return the D-dimensional histogram of the sample.
+ Return the N-dimensional histogram of the sample.
- :Parameters:
- - `sample` : A sequence of D arrays, or an NxD array.
- - `bins` : A sequence of edge arrays, a sequence of bin number,
- or a scalar (the number of bins for all dimensions.)
- - `range` : A sequence of lower and upper bin edges (default: [min, max]).
- - `normed` : Boolean, if False, return the number of samples in each bin,
- if True, returns the density.
- - `weights` : An array of weights. The weights are normed only if normed is True.
- Should weights.sum() not equal N, the total bin count will
- not be equal to the number of samples.
+ Parameters:
- :Return:
- - `hist` : Histogram array.
- - `edges` : List of arrays defining the bin edges.
-
+ sample : sequence or array
+ A sequence containing N arrays or an NxM array. Input data.
- Example:
- >>> x = random.randn(100,3)
- >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
+ bins : sequence or scalar
+ A sequence of edge arrays, a sequence of bin counts, or a scalar
+ which is the bin count for all dimensions. Default is 10.
- :SeeAlso: histogram
+ range : sequence
+ A sequence of lower and upper bin edges. Default is [min, max].
+ normed : boolean
+ If False, return the number of samples in each bin, if True,
+ returns the density.
+
+ weights : array
+ Array of weights. The weights are normed only if normed is True.
+ Should the sum of the weights not equal N, the total bin count will
+ not be equal to the number of samples.
+
+ Returns:
+
+ hist : array
+ Histogram array.
+
+ edges : list
+ List of arrays defining the lower bin edges.
+
+ SeeAlso:
+
+ histogram
+
+ Example
+
+ >>> x = random.randn(100,3)
+ >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
+
"""
- try:
+ try:
# Sample is an ND-array.
N, D = sample.shape
- except (AttributeError, ValueError):
+ except (AttributeError, ValueError):
# Sample is a sequence of 1D arrays.
sample = atleast_2d(sample).T
N, D = sample.shape
@@ -161,7 +189,7 @@
dedges = D*[None]
if weights is not None:
weights = asarray(weights)
-
+
try:
M = len(bins)
if M != D:
@@ -172,14 +200,20 @@
# Select range for each dimension
# Used only if number of bins is given.
if range is None:
- smin = atleast_1d(sample.min(0))
- smax = atleast_1d(sample.max(0))
+ smin = atleast_1d(array(sample.min(0), float))
+ smax = atleast_1d(array(sample.max(0), float))
else:
smin = zeros(D)
smax = zeros(D)
for i in arange(D):
smin[i], smax[i] = range[i]
+ # Make sure the bins have a finite width.
+ for i in arange(len(smin)):
+ if smin[i] == smax[i]:
+ smin[i] = smin[i] - .5
+ smax[i] = smax[i] + .5
+
# Create edge arrays
for i in arange(D):
if isscalar(bins[i]):
@@ -189,14 +223,14 @@
edges[i] = asarray(bins[i], float)
nbin[i] = len(edges[i])+1 # +1 for outlier bins
dedges[i] = diff(edges[i])
-
+
nbin = asarray(nbin)
-
- # Compute the bin number each sample falls into.
+
+ # Compute the bin number each sample falls into.
Ncount = {}
for i in arange(D):
Ncount[i] = digitize(sample[:,i], edges[i])
-
+
# Using digitize, values that fall on an edge are put in the right bin.
# For the rightmost bin, we want values equal to the right
# edge to be counted in the last bin, and not as an outlier.
@@ -206,7 +240,7 @@
decimal = int(-log10(dedges[i].min())) +6
# Find which points are on the rightmost edge.
on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
- # Shift these points one bin to the left.
+ # Shift these points one bin to the left.
Ncount[i][on_edge] -= 1
# Flattened histogram matrix (1D)
@@ -238,7 +272,7 @@
# Remove outliers (indices 0 and -1 for each dimension).
core = D*[slice(1,-1)]
hist = hist[core]
-
+
# Normalize if normed is True
if normed:
s = hist.sum()
Modified: trunk/numpy/lib/tests/test_function_base.py
===================================================================
--- trunk/numpy/lib/tests/test_function_base.py 2007-05-13 20:15:09 UTC (rev 3756)
+++ trunk/numpy/lib/tests/test_function_base.py 2007-05-13 23:22:17 UTC (rev 3757)
@@ -60,7 +60,7 @@
def check_weighted(self):
y1 = array([[1,2,3],
[4,5,6]])
- actual = average(y1,weights=[1,2],axis=0)
+ actual = average(y1,weights=[1,2],axis=0)
desired = array([3.,4.,5.])
assert_array_equal(actual, desired)
@@ -394,12 +394,12 @@
Z[range(5), range(5), range(5)] = 1.
H,edges = histogramdd([arange(5), arange(5), arange(5)], 5)
assert_array_equal(H, Z)
-
+
def check_shape(self):
x = rand(100,3)
hist3d, edges = histogramdd(x, bins = (5, 7, 6))
assert_array_equal(hist3d.shape, (5,7,6))
-
+
def check_weights(self):
v = rand(100,2)
hist, edges = histogramdd(v)
@@ -410,8 +410,12 @@
assert_array_equal(w_hist, n_hist)
w_hist, edges = histogramdd(v, weights=ones(100, int)*2)
assert_array_equal(w_hist, 2*hist)
-
+ def check_identical_samples(self):
+ x = zeros((10,2),int)
+ hist, edges = histogramdd(x, bins=2)
+ assert_array_equal(edges[0],array([-0.5, 0. , 0.5]))
+
class test_unique(NumpyTestCase):
def check_simple(self):
x = array([4,3,2,1,1,2,3,4, 0])
More information about the Numpy-svn
mailing list