[Numpy-svn] r3757 - in trunk/numpy/lib: . tests

numpy-svn at scipy.org numpy-svn at scipy.org
Sun May 13 19:22:20 EDT 2007


Author: charris
Date: 2007-05-13 18:22:17 -0500 (Sun, 13 May 2007)
New Revision: 3757

Modified:
   trunk/numpy/lib/function_base.py
   trunk/numpy/lib/tests/test_function_base.py
Log:
Add patch from dhuard to histogramdd. Fixes ticket #509.

Restructure restructured comments; avoid consolidated lists, they are too ugly
to contemplate and move around where they aren't wanted. They can be fixed later
if epydoc fixes things up.


Modified: trunk/numpy/lib/function_base.py
===================================================================
--- trunk/numpy/lib/function_base.py	2007-05-13 20:15:09 UTC (rev 3756)
+++ trunk/numpy/lib/function_base.py	2007-05-13 23:22:17 UTC (rev 3757)
@@ -71,29 +71,41 @@
 def histogram(a, bins=10, range=None, normed=False):
     """Compute the histogram from a set of data.
 
-    :Parameters:
-      - `a` : array
-        The data to histogram. n-D arrays will be flattened.
-      - `bins` : int or sequence of floats, optional
-        If an int, then the number of equal-width bins in the given range.
-        Otherwise, a sequence of the lower bound of each bin.
-      - `range` : (float, float), optional
-        The lower and upper range of the bins. If not provided, then (a.min(),
-        a.max()) is used. Values outside of this range are allocated to the
-        closest bin.
-      - `normed` : bool, optional
-        If False, the result array will contain the number of samples in each bin.
-        If True, the result array is the value of the probability *density*
-        function at the bin normalized such that the *integral* over the range
-        is 1. Note that the sum of all of the histogram values will not usually
-        be 1; it is not a probability *mass* function.
+    Parameters:
 
-    :Returns:
-      - `hist` : array (n,)
-        The values of the histogram. See `normed` for a description of the
-        possible semantics.
-      - `lower_edges` : float array (n,)
-        The lower edges of each bin.
+        a : array
+            The data to histogram. n-D arrays will be flattened.
+
+        bins : int or sequence of floats
+            If an int, then the number of equal-width bins in the given range.
+            Otherwise, a sequence of the lower bound of each bin.
+
+        range : (float, float)
+            The lower and upper range of the bins. If not provided, then
+            (a.min(), a.max()) is used. Values outside of this range are
+            allocated to the closest bin.
+
+        normed : bool
+            If False, the result array will contain the number of samples in
+            each bin.  If True, the result array is the value of the
+            probability *density* function at the bin normalized such that the
+            *integral* over the range is 1. Note that the sum of all of the
+            histogram values will not usually be 1; it is not a probability
+            *mass* function.
+
+    Returns:
+
+        hist : array
+            The values of the histogram. See `normed` for a description of the
+            possible semantics.
+
+        lower_edges : float array
+            The lower edges of each bin.
+
+    SeeAlso:
+
+        histogramdd
+
     """
     a = asarray(a).ravel()
     if not iterable(bins):
@@ -120,38 +132,54 @@
         return n, bins
 
 def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
-    """histogramdd(sample, bins=10, range=None, normed=False, weights=None)                                                                
+    """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
 
-    Return the D-dimensional histogram of the sample.
+    Return the N-dimensional histogram of the sample.
 
-    :Parameters:
-      - `sample` : A sequence of D arrays, or an NxD array. 
-      - `bins` : A sequence of edge arrays, a sequence of bin number, 
-                or a scalar (the number of bins for all dimensions.) 
-      - `range` : A sequence of lower and upper bin edges (default: [min, max]).
-      - `normed` : Boolean, if False, return the number of samples in each bin,
-                if True, returns the density.
-      - `weights` : An array of weights. The weights are normed only if normed is True. 
-             Should weights.sum() not equal N, the total bin count will 
-             not be equal to the number of samples.
+    Parameters:
 
-    :Return:
-      - `hist` : Histogram array.
-      - `edges` : List of arrays defining the bin edges. 
-    
+        sample : sequence or array
+            A sequence containing N arrays or an NxM array. Input data.
 
-    Example:
-      >>> x = random.randn(100,3)
-      >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
+        bins : sequence or scalar
+            A sequence of edge arrays, a sequence of bin counts, or a scalar
+            which is the bin count for all dimensions. Default is 10.
 
-    :SeeAlso: histogram
+        range : sequence
+            A sequence of lower and upper bin edges. Default is [min, max].
 
+        normed : boolean
+            If False, return the number of samples in each bin, if True,
+            returns the density.
+
+        weights : array
+            Array of weights.  The weights are normed only if normed is True.
+            Should the sum of the weights not equal N, the total bin count will
+            not be equal to the number of samples.
+
+    Returns:
+
+        hist : array
+            Histogram array.
+
+        edges : list
+            List of arrays defining the lower bin edges.
+
+    SeeAlso:
+
+        histogram
+
+    Example
+
+        >>> x = random.randn(100,3)
+        >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
+
     """
 
-    try: 
+    try:
         # Sample is an ND-array.
         N, D = sample.shape
-    except (AttributeError, ValueError): 
+    except (AttributeError, ValueError):
         # Sample is a sequence of 1D arrays.
         sample = atleast_2d(sample).T
         N, D = sample.shape
@@ -161,7 +189,7 @@
     dedges = D*[None]
     if weights is not None:
         weights = asarray(weights)
-    
+
     try:
         M = len(bins)
         if M != D:
@@ -172,14 +200,20 @@
     # Select range for each dimension
     # Used only if number of bins is given.
     if range is None:
-        smin = atleast_1d(sample.min(0))
-        smax = atleast_1d(sample.max(0))
+        smin = atleast_1d(array(sample.min(0), float))
+        smax = atleast_1d(array(sample.max(0), float))
     else:
         smin = zeros(D)
         smax = zeros(D)
         for i in arange(D):
             smin[i], smax[i] = range[i]
 
+    # Make sure the bins have a finite width.
+    for i in arange(len(smin)):
+        if smin[i] == smax[i]:
+            smin[i] = smin[i] - .5
+            smax[i] = smax[i] + .5
+
     # Create edge arrays
     for i in arange(D):
         if isscalar(bins[i]):
@@ -189,14 +223,14 @@
             edges[i] = asarray(bins[i], float)
             nbin[i] = len(edges[i])+1  # +1 for outlier bins
         dedges[i] = diff(edges[i])
-        
+
     nbin =  asarray(nbin)
-    
-    # Compute the bin number each sample falls into. 
+
+    # Compute the bin number each sample falls into.
     Ncount = {}
     for i in arange(D):
         Ncount[i] = digitize(sample[:,i], edges[i])
-    
+
     # Using digitize, values that fall on an edge are put in the right bin.
     # For the rightmost bin, we want values equal to the right
     # edge to be counted in the last bin, and not as an outlier.
@@ -206,7 +240,7 @@
         decimal = int(-log10(dedges[i].min())) +6
         # Find which points are on the rightmost edge.
         on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
-        # Shift these points one bin to the left. 
+        # Shift these points one bin to the left.
         Ncount[i][on_edge] -= 1
 
     # Flattened histogram matrix (1D)
@@ -238,7 +272,7 @@
     # Remove outliers (indices 0 and -1 for each dimension).
     core = D*[slice(1,-1)]
     hist = hist[core]
-    
+
     # Normalize if normed is True
     if normed:
         s = hist.sum()

Modified: trunk/numpy/lib/tests/test_function_base.py
===================================================================
--- trunk/numpy/lib/tests/test_function_base.py	2007-05-13 20:15:09 UTC (rev 3756)
+++ trunk/numpy/lib/tests/test_function_base.py	2007-05-13 23:22:17 UTC (rev 3757)
@@ -60,7 +60,7 @@
     def check_weighted(self):
         y1 = array([[1,2,3],
                     [4,5,6]])
-        actual = average(y1,weights=[1,2],axis=0)            
+        actual = average(y1,weights=[1,2],axis=0)
         desired = array([3.,4.,5.])
         assert_array_equal(actual, desired)
 
@@ -394,12 +394,12 @@
         Z[range(5), range(5), range(5)] = 1.
         H,edges = histogramdd([arange(5), arange(5), arange(5)], 5)
         assert_array_equal(H, Z)
-    
+
     def check_shape(self):
         x = rand(100,3)
         hist3d, edges = histogramdd(x, bins = (5, 7, 6))
         assert_array_equal(hist3d.shape, (5,7,6))
-        
+
     def check_weights(self):
         v = rand(100,2)
         hist, edges = histogramdd(v)
@@ -410,8 +410,12 @@
         assert_array_equal(w_hist, n_hist)
         w_hist, edges = histogramdd(v, weights=ones(100, int)*2)
         assert_array_equal(w_hist, 2*hist)
-            
 
+    def check_identical_samples(self):
+        x = zeros((10,2),int)
+        hist, edges = histogramdd(x, bins=2)
+        assert_array_equal(edges[0],array([-0.5,  0. ,  0.5]))
+
 class test_unique(NumpyTestCase):
     def check_simple(self):
         x = array([4,3,2,1,1,2,3,4, 0])




More information about the Numpy-svn mailing list