[Python-checkins] bpo-27181: Add statistics.geometric_mean() (GH-12638)
Raymond Hettinger
webhook-mailer at python.org
Sun Apr 7 12:20:07 EDT 2019
https://github.com/python/cpython/commit/6463ba3061bd311413d2951dc83c565907e10459
commit: 6463ba3061bd311413d2951dc83c565907e10459
branch: master
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-04-07T09:20:03-07:00
summary:
bpo-27181: Add statistics.geometric_mean() (GH-12638)
files:
A Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
M Doc/library/statistics.rst
M Doc/whatsnew/3.8.rst
M Lib/statistics.py
M Lib/test/test_statistics.py
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 1d52d98b2997..8bb2bdf7b697 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -40,6 +40,7 @@ or sample.
======================= ===============================================================
:func:`mean` Arithmetic mean ("average") of data.
:func:`fmean` Fast, floating point arithmetic mean.
+:func:`geometric_mean` Geometric mean of data.
:func:`harmonic_mean` Harmonic mean of data.
:func:`median` Median (middle value) of data.
:func:`median_low` Low median of data.
@@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences.
.. versionadded:: 3.8
+.. function:: geometric_mean(data)
+
+ Convert *data* to floats and compute the geometric mean.
+
+ Raises a :exc:`StatisticsError` if the input dataset is empty,
+ if it contains a zero, or if it contains a negative value.
+
+ No special efforts are made to achieve exact results.
+ (However, this may change in the future.)
+
+ .. doctest::
+
+ >>> round(geometric_mean([54, 24, 36]), 9)
+ 36.0
+
+ .. versionadded:: 3.8
+
+
.. function:: harmonic_mean(data)
Return the harmonic mean of *data*, a sequence or iterator of
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index ac20ee3aa57c..4347b3ee4118 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
Steven D'Aprano in :issue:`35904`.)
+Added :func:`statistics.geometric_mean()`
+(Contributed by Raymond Hettinger in :issue:`27181`.)
+
Added :func:`statistics.multimode` that returns a list of the most
common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
diff --git a/Lib/statistics.py b/Lib/statistics.py
index bd8a6f96381a..262ad976b65c 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -11,13 +11,14 @@
Function Description
================== =============================================
mean Arithmetic mean (average) of data.
+geometric_mean Geometric mean of data.
harmonic_mean Harmonic mean of data.
median Median (middle value) of data.
median_low Low median of data.
median_high High median of data.
median_grouped Median, or 50th percentile, of grouped data.
mode Mode (most common value) of data.
-multimode List of modes (most common values of data)
+multimode List of modes (most common values of data).
================== =============================================
Calculate the arithmetic mean ("the average") of data:
@@ -81,6 +82,7 @@
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
+ 'geometric_mean',
]
import math
@@ -328,6 +330,24 @@ def count(x):
except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None
+def geometric_mean(data):
+ """Convert data to floats and compute the geometric mean.
+
+ Raises a StatisticsError if the input dataset is empty,
+ if it contains a zero, or if it contains a negative value.
+
+ No special efforts are made to achieve exact results.
+ (However, this may change in the future.)
+
+ >>> round(geometric_mean([54, 24, 36]), 9)
+ 36.0
+ """
+ try:
+ return exp(fmean(map(log, data)))
+ except ValueError:
+ raise StatisticsError('geometric mean requires a non-empty dataset '
+ ' containing positive numbers') from None
+
def harmonic_mean(data):
"""Return the harmonic mean of data.
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 7f7839de4600..4d397eb1265d 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2038,6 +2038,94 @@ def test_compare_to_variance(self):
expected = math.sqrt(statistics.variance(data))
self.assertEqual(self.func(data), expected)
+class TestGeometricMean(unittest.TestCase):
+
+ def test_basics(self):
+ geometric_mean = statistics.geometric_mean
+ self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
+ self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
+ self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
+
+ random.seed(86753095551212)
+ for rng in [
+ range(1, 100),
+ range(1, 1_000),
+ range(1, 10_000),
+ range(500, 10_000, 3),
+ range(10_000, 500, -3),
+ [12, 17, 13, 5, 120, 7],
+ [random.expovariate(50.0) for i in range(1_000)],
+ [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
+ [random.triangular(2000, 3000, 2200) for i in range(3_000)],
+ ]:
+ gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
+ gm_float = geometric_mean(rng)
+ self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
+
+ def test_various_input_types(self):
+ geometric_mean = statistics.geometric_mean
+ D = Decimal
+ F = Fraction
+ # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
+ expected_mean = 4.18886
+ for data, kind in [
+ ([3.5, 4.0, 5.25], 'floats'),
+ ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
+ ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
+ ([3.5, 4, F(21, 4)], 'mixed types'),
+ ((3.5, 4.0, 5.25), 'tuple'),
+ (iter([3.5, 4.0, 5.25]), 'iterator'),
+ ]:
+ actual_mean = geometric_mean(data)
+ self.assertIs(type(actual_mean), float, kind)
+ self.assertAlmostEqual(actual_mean, expected_mean, places=5)
+
+ def test_big_and_small(self):
+ geometric_mean = statistics.geometric_mean
+
+ # Avoid overflow to infinity
+ large = 2.0 ** 1000
+ big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
+ self.assertTrue(math.isclose(big_gm, 36.0 * large))
+ self.assertFalse(math.isinf(big_gm))
+
+ # Avoid underflow to zero
+ small = 2.0 ** -1000
+ small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
+ self.assertTrue(math.isclose(small_gm, 36.0 * small))
+ self.assertNotEqual(small_gm, 0.0)
+
+ def test_error_cases(self):
+ geometric_mean = statistics.geometric_mean
+ StatisticsError = statistics.StatisticsError
+ with self.assertRaises(StatisticsError):
+ geometric_mean([]) # empty input
+ with self.assertRaises(StatisticsError):
+ geometric_mean([3.5, 0.0, 5.25]) # zero input
+ with self.assertRaises(StatisticsError):
+ geometric_mean([3.5, -4.0, 5.25]) # negative input
+ with self.assertRaises(StatisticsError):
+ geometric_mean(iter([])) # empty iterator
+ with self.assertRaises(TypeError):
+ geometric_mean(None) # non-iterable input
+ with self.assertRaises(TypeError):
+ geometric_mean([10, None, 20]) # non-numeric input
+ with self.assertRaises(TypeError):
+ geometric_mean() # missing data argument
+ with self.assertRaises(TypeError):
+ geometric_mean([10, 20, 60], 70) # too many arguments
+
+ def test_special_values(self):
+ # Rules for special values are inherited from math.fsum()
+ geometric_mean = statistics.geometric_mean
+ NaN = float('Nan')
+ Inf = float('Inf')
+ self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
+ self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
+ self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
+ with self.assertRaises(ValueError):
+ geometric_mean([Inf, -Inf])
+
class TestNormalDist(unittest.TestCase):
# General note on precision: The pdf(), cdf(), and overlap() methods
diff --git a/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
new file mode 100644
index 000000000000..3ce41c557982
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
@@ -0,0 +1 @@
+Add statistics.geometric_mean().
More information about the Python-checkins
mailing list