[Python-checkins] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (#105127)" (#106733)

gpshead webhook-mailer at python.org
Thu Jul 20 23:30:55 EDT 2023


https://github.com/python/cpython/commit/a31dea1feb61793e48fa9aa5014f358352205c1d
commit: a31dea1feb61793e48fa9aa5014f358352205c1d
branch: main
author: Gregory P. Smith <greg at krypto.org>
committer: gpshead <greg at krypto.org>
date: 2023-07-20T20:30:52-07:00
summary:

gh-106669: Revert "gh-102988: Detect email address parsing errors ... (#105127)" (#106733)

This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00.
Adds a regression test from the issue.

See https://github.com/python/cpython/issues/106669.

files:
M Doc/library/email.utils.rst
M Doc/whatsnew/3.12.rst
M Lib/email/utils.py
M Lib/test/test_email/test_email.py
M Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst

diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index a87a0bd2e7de6..345b64001c1ac 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -65,11 +65,6 @@ of the new API.
    *email address* parts.  Returns a tuple of that information, unless the parse
    fails, in which case a 2-tuple of ``('', '')`` is returned.
 
-   .. versionchanged:: 3.12
-      For security reasons, addresses that were ambiguous and could parse into
-      multiple different addresses now cause ``('', '')`` to be returned
-      instead of only one of the *potential* addresses.
-
 
 .. function:: formataddr(pair, charset='utf-8')
 
@@ -92,7 +87,7 @@ of the new API.
    This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
    *fieldvalues* is a sequence of header field values as might be returned by
    :meth:`Message.get_all <email.message.Message.get_all>`.  Here's a simple
-   example that gets all the recipients of a message:
+   example that gets all the recipients of a message::
 
       from email.utils import getaddresses
 
@@ -102,25 +97,6 @@ of the new API.
       resent_ccs = msg.get_all('resent-cc', [])
       all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
 
-   When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
-   is returned in its place.  Other errors in parsing the list of
-   addresses such as a fieldvalue seemingly parsing into multiple
-   addresses may result in a list containing a single empty 2-tuple
-   ``[('', '')]`` being returned rather than returning potentially
-   invalid output.
-
-   Example malformed input parsing:
-
-   .. doctest::
-
-      >>> from email.utils import getaddresses
-      >>> getaddresses(['alice at example.com <bob at example.com>', 'me at example.com'])
-      [('', '')]
-
-   .. versionchanged:: 3.12
-      The 2-tuple of ``('', '')`` in the returned values when parsing
-      fails were added as to address a security issue.
-
 
 .. function:: parsedate(date)
 
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index a6d101bdb9f7a..ab5e5b83e98c0 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -570,14 +570,6 @@ dis
   :data:`~dis.hasarg` collection instead.
   (Contributed by Irit Katriel in :gh:`94216`.)
 
-email
------
-
-* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
-  ``('', '')`` 2-tuples in more situations where invalid email addresses are
-  encountered instead of potentially inaccurate values.
-  (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
-
 fractions
 ---------
 
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 11ad75e94e934..81da5394ea169 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
     return address
 
 
-def _pre_parse_validation(email_header_fields):
-    accepted_values = []
-    for v in email_header_fields:
-        s = v.replace('\\(', '').replace('\\)', '')
-        if s.count('(') != s.count(')'):
-            v = "('', '')"
-        accepted_values.append(v)
-
-    return accepted_values
-
-
-def _post_parse_validation(parsed_email_header_tuples):
-    accepted_values = []
-    # The parser would have parsed a correctly formatted domain-literal
-    # The existence of an [ after parsing indicates a parsing failure
-    for v in parsed_email_header_tuples:
-        if '[' in v[1]:
-            v = ('', '')
-        accepted_values.append(v)
-
-    return accepted_values
-
 
 def getaddresses(fieldvalues):
-    """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
-
-    When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
-    its place.
-
-    If the resulting list of parsed address is not the same as the number of
-    fieldvalues in the input list a parsing error has occurred.  A list
-    containing a single empty 2-tuple [('', '')] is returned in its place.
-    This is done to avoid invalid output.
-    """
-    fieldvalues = [str(v) for v in fieldvalues]
-    fieldvalues = _pre_parse_validation(fieldvalues)
-    all = COMMASPACE.join(v for v in fieldvalues)
+    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+    all = COMMASPACE.join(str(v) for v in fieldvalues)
     a = _AddressList(all)
-    result = _post_parse_validation(a.addresslist)
-
-    n = 0
-    for v in fieldvalues:
-        n += v.count(',') + 1
-
-    if len(result) != n:
-        return [('', '')]
-
-    return result
+    return a.addresslist
 
 
 def _format_timetuple_and_zone(timetuple, zone):
@@ -254,18 +212,9 @@ def parseaddr(addr):
     Return a tuple of realname and email address, unless the parse fails, in
     which case return a 2-tuple of ('', '').
     """
-    if isinstance(addr, list):
-        addr = addr[0]
-
-    if not isinstance(addr, str):
-        return ('', '')
-
-    addr = _pre_parse_validation([addr])[0]
-    addrs = _post_parse_validation(_AddressList(addr).addresslist)
-
-    if not addrs or len(addrs) > 1:
-        return ('', '')
-
+    addrs = _AddressList(addr).addresslist
+    if not addrs:
+        return '', ''
     return addrs[0]
 
 
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 5238944d6b478..b4f3a2481976e 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3319,90 +3319,32 @@ def test_getaddresses(self):
            [('Al Person', 'aperson at dom.ain'),
             ('Bud Person', 'bperson at dom.ain')])
 
-    def test_getaddresses_parsing_errors(self):
-        """Test for parsing errors from CVE-2023-27043"""
-        eq = self.assertEqual
-        eq(utils.getaddresses(['alice at example.org(<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org)<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org<<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org><bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org@<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org,<bob at example.com>']),
-           [('', 'alice at example.org'), ('', 'bob at example.com')])
-        eq(utils.getaddresses(['alice at example.org;<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org:<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org.<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org"<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org[<bob at example.com>']),
-           [('', '')])
-        eq(utils.getaddresses(['alice at example.org]<bob at example.com>']),
-           [('', '')])
-
-    def test_parseaddr_parsing_errors(self):
-        """Test for parsing errors from CVE-2023-27043"""
-        eq = self.assertEqual
-        eq(utils.parseaddr(['alice at example.org(<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org)<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org<<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org><bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org@<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org,<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org;<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org:<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org.<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org"<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org[<bob at example.com>']),
-           ('', ''))
-        eq(utils.parseaddr(['alice at example.org]<bob at example.com>']),
-           ('', ''))
+    def test_getaddresses_comma_in_name(self):
+        """GH-106669 regression test."""
+        self.assertEqual(
+            utils.getaddresses(
+                [
+                    '"Bud, Person" <bperson at dom.ain>',
+                    'aperson at dom.ain (Al Person)',
+                    '"Mariusz Felisiak" <to at example.com>',
+                ]
+            ),
+            [
+                ('Bud, Person', 'bperson at dom.ain'),
+                ('Al Person', 'aperson at dom.ain'),
+                ('Mariusz Felisiak', 'to at example.com'),
+            ],
+        )
 
     def test_getaddresses_nasty(self):
         eq = self.assertEqual
         eq(utils.getaddresses(['foo: ;']), [('', '')])
-        eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
+        eq(utils.getaddresses(
+           ['[]*-- =~$']),
+           [('', ''), ('', ''), ('', '*--')])
         eq(utils.getaddresses(
            ['foo: ;', '"Jason R. Mastaler" <jason at dom.ain>']),
            [('', ''), ('Jason R. Mastaler', 'jason at dom.ain')])
-        eq(utils.getaddresses(
-           [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
-           [('Pete (A nice ) chap his account his host)', 'pete at silly.test')])
-        eq(utils.getaddresses(
-           ['(Empty list)(start)Undisclosed recipients  :(nobody(I know))']),
-           [('', '')])
-        eq(utils.getaddresses(
-           ['Mary <@machine.tld:mary at example.net>, , jdoe at test   . example']),
-           [('Mary', 'mary at example.net'), ('', ''), ('', 'jdoe at test.example')])
-        eq(utils.getaddresses(
-           ['John Doe <jdoe at machine(comment).  example>']),
-           [('John Doe (comment)', 'jdoe at machine.example')])
-        eq(utils.getaddresses(
-           ['"Mary Smith: Personal Account" <smith at home.example>']),
-           [('Mary Smith: Personal Account', 'smith at home.example')])
-        eq(utils.getaddresses(
-           ['Undisclosed recipients:;']),
-           [('', '')])
-        eq(utils.getaddresses(
-           [r'<boss at nil.test>, "Giant; \"Big\" Box" <bob at example.net>']),
-           [('', 'boss at nil.test'), ('Giant; "Big" Box', 'bob at example.net')])
 
     def test_getaddresses_embedded_comment(self):
         """Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
index e0434ccd2ccab..c67ec45737b53 100644
--- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -1,4 +1,4 @@
-CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
-and :func:`email.utils.getaddresses` from returning the realname portion of an
-invalid RFC2822 email header in the email address portion of the 2-tuple
-returned after being parsed by :class:`email._parseaddr.AddressList`.
+Reverted the :mod:`email.utils` security improvement change released in
+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
+to parse email addresses with a comma in the quoted name field.
+See :gh:`106669`.



More information about the Python-checkins mailing list