[Python-checkins] bpo-44151: linear_regression() minor API improvements (GH-26199) (GH-26338)

rhettinger webhook-mailer at python.org
Mon May 24 21:11:21 EDT 2021


https://github.com/python/cpython/commit/86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7
commit: 86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: rhettinger <rhettinger at users.noreply.github.com>
date: 2021-05-24T18:11:12-07:00
summary:

bpo-44151: linear_regression() minor API improvements (GH-26199) (GH-26338)

files:
M Doc/library/statistics.rst
M Lib/statistics.py
M Lib/test/test_statistics.py

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index a65c9840b8113..bf87e41495171 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -76,7 +76,7 @@ These functions calculate statistics regarding relations between two inputs.
 =========================  =====================================================
 :func:`covariance`         Sample covariance for two variables.
 :func:`correlation`        Pearson's correlation coefficient for two variables.
-:func:`linear_regression`  Intercept and slope for simple linear regression.
+:func:`linear_regression`  Slope and intercept for simple linear regression.
 =========================  =====================================================
 
 
@@ -626,24 +626,25 @@ However, for reading convenience, most of the examples show sorted sequences.
 
    .. versionadded:: 3.10
 
-.. function:: linear_regression(regressor, dependent_variable)
+.. function:: linear_regression(independent_variable, dependent_variable)
 
-   Return the intercept and slope of `simple linear regression
+   Return the slope and intercept of `simple linear regression
    <https://en.wikipedia.org/wiki/Simple_linear_regression>`_
    parameters estimated using ordinary least squares. Simple linear
-   regression describes the relationship between *regressor* and
-   *dependent variable* in terms of this linear function:
+   regression describes the relationship between an independent variable *x* and
+   a dependent variable *y* in terms of this linear function:
 
-      *dependent_variable = intercept + slope \* regressor + noise*
+      *y = intercept + slope \* x + noise*
 
-   where ``intercept`` and ``slope`` are the regression parameters that are
+   where ``slope`` and ``intercept`` are the regression parameters that are
    estimated, and noise represents the
    variability of the data that was not explained by the linear regression
    (it is equal to the difference between predicted and actual values
    of dependent variable).
 
-   Both inputs must be of the same length (no less than two), and regressor
-   needs not to be constant; otherwise :exc:`StatisticsError` is raised.
+   Both inputs must be of the same length (no less than two), and
+   the independent variable *x* needs not to be constant;
+   otherwise :exc:`StatisticsError` is raised.
 
    For example, we can use the `release dates of the Monty
    Python films <https://en.wikipedia.org/wiki/Monty_Python#Films>`_, and used
@@ -655,7 +656,7 @@ However, for reading convenience, most of the examples show sorted sequences.
 
       >>> year = [1971, 1975, 1979, 1982, 1983]
       >>> films_total = [1, 2, 3, 4, 5]
-      >>> intercept, slope = linear_regression(year, films_total)
+      >>> slope, intercept = linear_regression(year, films_total)
       >>> round(intercept + slope * 2019)
       16
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index c2f8dcd9ada89..f164210ae81af 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -94,7 +94,7 @@
 >>> correlation(x, y)  #doctest: +ELLIPSIS
 0.31622776601...
 >>> linear_regression(x, y)  #doctest:
-LinearRegression(intercept=1.5, slope=0.1)
+LinearRegression(slope=0.1, intercept=1.5)
 
 
 Exceptions
@@ -919,18 +919,18 @@ def correlation(x, y, /):
         raise StatisticsError('at least one of the inputs is constant')
 
 
-LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope'])
+LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
 
 
-def linear_regression(regressor, dependent_variable, /):
+def linear_regression(x, y, /):
     """Intercept and slope for simple linear regression
 
     Return the intercept and slope of simple linear regression
     parameters estimated using ordinary least squares. Simple linear
-    regression describes relationship between *regressor* and
-    *dependent variable* in terms of linear function:
+    regression describes relationship between *x* and
+    *y* in terms of linear function:
 
-        dependent_variable = intercept + slope * regressor + noise
+        y = intercept + slope * x + noise
 
     where *intercept* and *slope* are the regression parameters that are
     estimated, and noise represents the variability of the data that was
@@ -940,19 +940,18 @@ def linear_regression(regressor, dependent_variable, /):
 
     The parameters are returned as a named tuple.
 
-    >>> regressor = [1, 2, 3, 4, 5]
+    >>> x = [1, 2, 3, 4, 5]
     >>> noise = NormalDist().samples(5, seed=42)
-    >>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)]
-    >>> linear_regression(regressor, dependent_variable)  #doctest: +ELLIPSIS
-    LinearRegression(intercept=1.75684970486..., slope=3.09078914170...)
+    >>> y = [2 + 3 * x[i] + noise[i] for i in range(5)]
+    >>> linear_regression(x, y)  #doctest: +ELLIPSIS
+    LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
 
     """
-    n = len(regressor)
-    if len(dependent_variable) != n:
+    n = len(x)
+    if len(y) != n:
         raise StatisticsError('linear regression requires that both inputs have same number of data points')
     if n < 2:
         raise StatisticsError('linear regression requires at least two data points')
-    x, y = regressor, dependent_variable
     xbar = fsum(x) / n
     ybar = fsum(y) / n
     sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
@@ -960,9 +959,9 @@ def linear_regression(regressor, dependent_variable, /):
     try:
         slope = sxy / s2x
     except ZeroDivisionError:
-        raise StatisticsError('regressor is constant')
+        raise StatisticsError('x is constant')
     intercept = ybar - slope * xbar
-    return LinearRegression(intercept=intercept, slope=slope)
+    return LinearRegression(slope=slope, intercept=intercept)
 
 
 ## Normal Distribution #####################################################
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 70d269dea732d..436c420149489 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2480,7 +2480,7 @@ def test_results(self):
             ([1, 2, 3], [21, 22, 23], 20, 1),
             ([1, 2, 3], [5.1, 5.2, 5.3], 5, 0.1),
         ]:
-            intercept, slope = statistics.linear_regression(x, y)
+            slope, intercept = statistics.linear_regression(x, y)
             self.assertAlmostEqual(intercept, true_intercept)
             self.assertAlmostEqual(slope, true_slope)
 



More information about the Python-checkins mailing list