[Scipy-svn] r6960 - in trunk/scipy/stats: . tests

Sun Nov 28 08:36:17 EST 2010

Author: rgommers
Date: 2010-11-28 07:36:17 -0600 (Sun, 28 Nov 2010)
New Revision: 6960

Modified:
   trunk/scipy/stats/stats.py
   trunk/scipy/stats/tests/test_stats.py
Log:
TST: add tests for stats.relfreq/cumfreq. Also improve docs and clean up code.

Clean up includes having relfreq accept array_like input, this closes #1227.

Modified: trunk/scipy/stats/stats.py
===================================================================

--- trunk/scipy/stats/stats.py	2010-11-28 13:35:38 UTC (rev 6959)
+++ trunk/scipy/stats/stats.py	2010-11-28 13:36:17 UTC (rev 6960)
@@ -1506,32 +1506,32 @@
 
     Parameters
     ----------
-    a: array like
+    a: array_like
         Array of scores which will be put into bins.
-    numbins: integer, optional
+    numbins: int, optional
         The number of bins to use for the histogram. Default is 10.
     defaultlimits: tuple (lower, upper), optional
         The lower and upper values for the range of the histogram.
         If no value is given, a range slightly larger then the range of the
-        values in a is used. Specifically (a.min() - s, a.max() + s),
-            where s is (1/2)(a.max() - a.min()) / (numbins - 1)
-    weights: array like, same length as a, optional
-        The weights for each value in a. Default is None, which gives each
+        values in a is used. Specifically ``(a.min() - s, a.max() + s)``,
+            where ``s = (1/2)(a.max() - a.min()) / (numbins - 1)``.
+    weights: array_like, optional
+        The weights for each value in `a`. Default is None, which gives each
         value a weight of 1.0
-    printextras: boolean, optional
+    printextras: bool, optional
         If True, the number of extra points is printed to standard output.
-        Default is False
+        Default is False.
 
     Returns
     -------
-    histogram: array
-        Number of points (or sum of weights) in each bin
+    histogram: ndarray
+        Number of points (or sum of weights) in each bin.
     low_range: float
         Lowest value of histogram, the lower limit of the first bin.
     binsize: float
         The size of the bins (all bins have the same size).
-    extrapoints: integer
-        The number of points outside the range of the histogram
+    extrapoints: int
+        The number of points outside the range of the histogram.
 
     See Also
     --------
@@ -1558,8 +1558,8 @@
     extrapoints = len([v for v in a
                        if defaultlimits[0] > v or v > defaultlimits[1]])
     if extrapoints > 0 and printextras:
-        # fixme: warnings.warn()
-        print '\nPoints outside given histogram range =',extrapoints
+        warnings.warn("Points outside given histogram range = %s" \
+                      %extrapoints)
     return (hist, defaultlimits[0], binsize, extrapoints)
 
 
@@ -1571,11 +1571,16 @@
     ----------
     a : array_like
         Input array.
-    numbins : int, optional
-        Number of bins.
-    defaultreallimits : 2-sequence or None, optional
-        None (use all data), or a 2-sequence containing lower and upper limits
-        on values to include.
+    numbins: int, optional
+        The number of bins to use for the histogram. Default is 10.
+    defaultlimits: tuple (lower, upper), optional
+        The lower and upper values for the range of the histogram.
+        If no value is given, a range slightly larger then the range of the
+        values in a is used. Specifically ``(a.min() - s, a.max() + s)``,
+            where ``s = (1/2)(a.max() - a.min()) / (numbins - 1)``.
+    weights: array_like, optional
+        The weights for each value in `a`. Default is None, which gives each
+        value a weight of 1.0
 
     Returns
     -------
@@ -1588,6 +1593,19 @@
     extrapoints : int
         Extra points.
 
+    Examples
+    --------
+    >>> x = [1, 4, 2, 1, 3, 1]
+    >>> cumfreqs, lowlim, binsize, extrapoints = sp.stats.cumfreq(x, numbins=4)
+    >>> cumfreqs
+    array([ 3.,  4.,  5.,  6.])
+    >>> cumfreqs, lowlim, binsize, extrapoints = \
+    ...     sp.stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5))
+    >>> cumfreqs
+    array([ 1.,  2.,  3.,  3.])
+    >>> extrapoints
+    3
+
     """
     h,l,b,e = histogram(a, numbins, defaultreallimits, weights=weights)
     cumhist = np.cumsum(h*1, axis=0)
@@ -1600,13 +1618,18 @@
 
     Parameters
     ----------
-    a : ndarray
+    a : array_like
         Input array.
-    numbins : int, optional
-        Number of bins.
-    defaultreallimits : 2-sequence or None, optional
-        None (use all data), or a 2-sequence containing lower and upper limits
-        on values to include.
+    numbins: int, optional
+        The number of bins to use for the histogram. Default is 10.
+    defaultlimits: tuple (lower, upper), optional
+        The lower and upper values for the range of the histogram.
+        If no value is given, a range slightly larger then the range of the
+        values in a is used. Specifically ``(a.min() - s, a.max() + s)``,
+            where ``s = (1/2)(a.max() - a.min()) / (numbins - 1)``.
+    weights: array_like, optional
+        The weights for each value in `a`. Default is None, which gives each
+        value a weight of 1.0
 
     Returns
     -------
@@ -1619,10 +1642,19 @@
     extrapoints : int
         Extra points.
 
+    Examples
+    --------
+    >>> a = np.array([1, 4, 2, 1, 3, 1])
+    >>> relfreqs, lowlim, binsize, extrapoints = sp.stats.relfreq(a, numbins=4)
+    >>> relfreqs
+    array([ 0.5       ,  0.16666667,  0.16666667,  0.16666667])
+    >>> np.sum(relfreqs)  # relative frequencies should add up to 1
+    0.99999999999999989
+
     """
-    h,l,b,e = histogram(a,numbins,defaultreallimits, weights=weights)
-    h = array(h/float(a.shape[0]))
-    return h,l,b,e
+    h, l, b, e = histogram(a, numbins, defaultreallimits, weights=weights)
+    h = np.array(h / float(np.array(a).shape[0]))
+    return h, l, b, e
 
 
 #####################################

Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py	2010-11-28 13:35:38 UTC (rev 6959)
+++ trunk/scipy/stats/tests/test_stats.py	2010-11-28 13:36:17 UTC (rev 6960)
@@ -804,6 +804,25 @@
                                     decimal=2)
 
 
+def test_cumfreq():
+    x = [1, 4, 2, 1, 3, 1]
+    cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4)
+    assert_array_almost_equal(cumfreqs, np.array([ 3.,  4.,  5.,  6.]))
+    cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4,
+                                                      defaultreallimits=(1.5, 5))
+    assert_(extrapoints==3)
+
+
+def test_relfreq():
+    a = np.array([1, 4, 2, 1, 3, 1])
+    relfreqs, lowlim, binsize, extrapoints = stats.relfreq(a, numbins=4)
+    assert_array_almost_equal(relfreqs, array([0.5, 0.16666667, 0.16666667, 0.16666667]))
+
+    # check array_like input is accepted
+    relfreqs2, lowlim, binsize, extrapoints = stats.relfreq([1, 4, 2, 1, 3, 1], numbins=4)
+    assert_array_almost_equal(relfreqs, relfreqs2)
+
+
 # Utility
 
 def compare_results(res,desired):