[Python-checkins] bpo-27181: Add statistics.geometric_mean() (GH-12638)

Raymond Hettinger webhook-mailer at python.org
Sun Apr 7 12:20:07 EDT 2019


https://github.com/python/cpython/commit/6463ba3061bd311413d2951dc83c565907e10459
commit: 6463ba3061bd311413d2951dc83c565907e10459
branch: master
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-04-07T09:20:03-07:00
summary:

bpo-27181: Add statistics.geometric_mean() (GH-12638)

files:
A Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
M Doc/library/statistics.rst
M Doc/whatsnew/3.8.rst
M Lib/statistics.py
M Lib/test/test_statistics.py

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 1d52d98b2997..8bb2bdf7b697 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -40,6 +40,7 @@ or sample.
 =======================  ===============================================================
 :func:`mean`             Arithmetic mean ("average") of data.
 :func:`fmean`            Fast, floating point arithmetic mean.
+:func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`median`           Median (middle value) of data.
 :func:`median_low`       Low median of data.
@@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences.
    .. versionadded:: 3.8
 
 
+.. function:: geometric_mean(data)
+
+   Convert *data* to floats and compute the geometric mean.
+
+   Raises a :exc:`StatisticsError` if the input dataset is empty,
+   if it contains a zero, or if it contains a negative value.
+
+   No special efforts are made to achieve exact results.
+   (However, this may change in the future.)
+
+   .. doctest::
+
+      >>> round(geometric_mean([54, 24, 36]), 9)
+      36.0
+
+   .. versionadded:: 3.8
+
+
 .. function:: harmonic_mean(data)
 
    Return the harmonic mean of *data*, a sequence or iterator of
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index ac20ee3aa57c..4347b3ee4118 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
 :func:`statistics.mean()`.  (Contributed by Raymond Hettinger and
 Steven D'Aprano in :issue:`35904`.)
 
+Added :func:`statistics.geometric_mean()`
+(Contributed by Raymond Hettinger in :issue:`27181`.)
+
 Added :func:`statistics.multimode` that returns a list of the most
 common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index bd8a6f96381a..262ad976b65c 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -11,13 +11,14 @@
 Function            Description
 ==================  =============================================
 mean                Arithmetic mean (average) of data.
+geometric_mean      Geometric mean of data.
 harmonic_mean       Harmonic mean of data.
 median              Median (middle value) of data.
 median_low          Low median of data.
 median_high         High median of data.
 median_grouped      Median, or 50th percentile, of grouped data.
 mode                Mode (most common value) of data.
-multimode           List of modes (most common values of data)
+multimode           List of modes (most common values of data).
 ==================  =============================================
 
 Calculate the arithmetic mean ("the average") of data:
@@ -81,6 +82,7 @@
             'pstdev', 'pvariance', 'stdev', 'variance',
             'median',  'median_low', 'median_high', 'median_grouped',
             'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
+            'geometric_mean',
           ]
 
 import math
@@ -328,6 +330,24 @@ def count(x):
     except ZeroDivisionError:
         raise StatisticsError('fmean requires at least one data point') from None
 
+def geometric_mean(data):
+    """Convert data to floats and compute the geometric mean.
+
+    Raises a StatisticsError if the input dataset is empty,
+    if it contains a zero, or if it contains a negative value.
+
+    No special efforts are made to achieve exact results.
+    (However, this may change in the future.)
+
+    >>> round(geometric_mean([54, 24, 36]), 9)
+    36.0
+    """
+    try:
+        return exp(fmean(map(log, data)))
+    except ValueError:
+        raise StatisticsError('geometric mean requires a non-empty dataset '
+                              ' containing positive numbers') from None
+
 def harmonic_mean(data):
     """Return the harmonic mean of data.
 
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 7f7839de4600..4d397eb1265d 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2038,6 +2038,94 @@ def test_compare_to_variance(self):
         expected = math.sqrt(statistics.variance(data))
         self.assertEqual(self.func(data), expected)
 
+class TestGeometricMean(unittest.TestCase):
+
+    def test_basics(self):
+        geometric_mean = statistics.geometric_mean
+        self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
+        self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
+        self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
+
+        random.seed(86753095551212)
+        for rng in [
+                range(1, 100),
+                range(1, 1_000),
+                range(1, 10_000),
+                range(500, 10_000, 3),
+                range(10_000, 500, -3),
+                [12, 17, 13, 5, 120, 7],
+                [random.expovariate(50.0) for i in range(1_000)],
+                [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
+                [random.triangular(2000, 3000, 2200) for i in range(3_000)],
+            ]:
+            gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
+            gm_float = geometric_mean(rng)
+            self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
+
+    def test_various_input_types(self):
+        geometric_mean = statistics.geometric_mean
+        D = Decimal
+        F = Fraction
+        # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
+        expected_mean = 4.18886
+        for data, kind in [
+            ([3.5, 4.0, 5.25], 'floats'),
+            ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
+            ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
+            ([3.5, 4, F(21, 4)], 'mixed types'),
+            ((3.5, 4.0, 5.25), 'tuple'),
+            (iter([3.5, 4.0, 5.25]), 'iterator'),
+                ]:
+            actual_mean = geometric_mean(data)
+            self.assertIs(type(actual_mean), float, kind)
+            self.assertAlmostEqual(actual_mean, expected_mean, places=5)
+
+    def test_big_and_small(self):
+        geometric_mean = statistics.geometric_mean
+
+        # Avoid overflow to infinity
+        large = 2.0 ** 1000
+        big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
+        self.assertTrue(math.isclose(big_gm, 36.0 * large))
+        self.assertFalse(math.isinf(big_gm))
+
+        # Avoid underflow to zero
+        small = 2.0 ** -1000
+        small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
+        self.assertTrue(math.isclose(small_gm, 36.0 * small))
+        self.assertNotEqual(small_gm, 0.0)
+
+    def test_error_cases(self):
+        geometric_mean = statistics.geometric_mean
+        StatisticsError = statistics.StatisticsError
+        with self.assertRaises(StatisticsError):
+            geometric_mean([])                      # empty input
+        with self.assertRaises(StatisticsError):
+            geometric_mean([3.5, 0.0, 5.25])        # zero input
+        with self.assertRaises(StatisticsError):
+            geometric_mean([3.5, -4.0, 5.25])       # negative input
+        with self.assertRaises(StatisticsError):
+            geometric_mean(iter([]))                # empty iterator
+        with self.assertRaises(TypeError):
+            geometric_mean(None)                    # non-iterable input
+        with self.assertRaises(TypeError):
+            geometric_mean([10, None, 20])          # non-numeric input
+        with self.assertRaises(TypeError):
+            geometric_mean()                        # missing data argument
+        with self.assertRaises(TypeError):
+            geometric_mean([10, 20, 60], 70)        # too many arguments
+
+    def test_special_values(self):
+        # Rules for special values are inherited from math.fsum()
+        geometric_mean = statistics.geometric_mean
+        NaN = float('Nan')
+        Inf = float('Inf')
+        self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
+        self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
+        self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
+        with self.assertRaises(ValueError):
+            geometric_mean([Inf, -Inf])
+
 class TestNormalDist(unittest.TestCase):
 
     # General note on precision: The pdf(), cdf(), and overlap() methods
diff --git a/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
new file mode 100644
index 000000000000..3ce41c557982
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
@@ -0,0 +1 @@
+Add statistics.geometric_mean().



More information about the Python-checkins mailing list