[Python-checkins] [3.5] closes bpo-38576: Disallow control characters in hostnames in http.client. (#19300)

Tapas Kundu webhook-mailer at python.org
Sat Jun 20 02:44:08 EDT 2020


https://github.com/python/cpython/commit/09d8172837b6985c4ad90ee025f6b5a554a9f0ac
commit: 09d8172837b6985c4ad90ee025f6b5a554a9f0ac
branch: 3.5
author: Tapas Kundu <39723251+tapakund at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2020-06-19T23:43:50-07:00
summary:

[3.5] closes bpo-38576: Disallow control characters in hostnames in http.client. (#19300)

Add host validation for control characters for more
CVE-2019-18348 protection.
(cherry picked from commit 83fc70159b24)

files:
A Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
M Lib/http/client.py
M Lib/test/test_httplib.py
M Lib/test/test_urllib.py

diff --git a/Lib/http/client.py b/Lib/http/client.py
index 85dc8028ef57f..5d5d7a7f707b4 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -771,6 +771,7 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
 
         (self.host, self.port) = self._get_hostport(host, port)
 
+        self._validate_host(self.host)
         # This is stored as an instance variable to allow unit
         # tests to replace it with a suitable mockup
         self._create_connection = socket.create_connection
@@ -1085,6 +1086,17 @@ def _validate_path(self, url):
             ).format(matched=match.group(), **locals())
             raise InvalidURL(msg)
 
+    def _validate_host(self, host):
+        """Validate a host so it doesn't contain control characters."""
+        # Prevent CVE-2019-18348.
+        match = _contains_disallowed_url_pchar_re.search(host)
+        if match:
+            msg = (
+                "URL can't contain control characters. {host!r} "
+                "(found at least {matched!r})"
+            ).format(matched=match.group(), host=host)
+            raise InvalidURL(msg)
+
     def putheader(self, header, *values):
         """Send a request header line to the server.
 
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index c12a4298bb04e..a8e7a30af1a9f 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -986,7 +986,7 @@ def run_server():
             thread.join()
         self.assertEqual(result, b"proxied data\n")
 
-    def test_putrequest_override_validation(self):
+    def test_putrequest_override_domain_validation(self):
         """
         It should be possible to override the default validation
         behavior in putrequest (bpo-38216).
@@ -999,6 +999,17 @@ def _validate_path(self, url):
         conn.sock = FakeSocket('')
         conn.putrequest('GET', '/\x00')
 
+    def test_putrequest_override_host_validation(self):
+        class UnsafeHTTPConnection(client.HTTPConnection):
+            def _validate_host(self, url):
+                pass
+
+        conn = UnsafeHTTPConnection('example.com\r\n')
+        conn.sock = FakeSocket('')
+        # set skip_host so a ValueError is not raised upon adding the
+        # invalid URL as the value of the "Host:" header
+        conn.putrequest('GET', '/', skip_host=1)
+
     def test_putrequest_override_encoding(self):
         """
         It should be possible to override the default encoding
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 1e2c622e29fd8..d1074adb7c84d 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -331,7 +331,7 @@ def test_willclose(self):
             self.unfakehttp()
 
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_control_char_rejected(self):
+    def test_url_path_with_control_char_rejected(self):
         for char_no in list(range(0, 0x21)) + [0x7f]:
             char = chr(char_no)
             schemeless_url = "//localhost:7777/test{}/".format(char)
@@ -360,7 +360,7 @@ def test_url_with_control_char_rejected(self):
                 self.unfakehttp()
 
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_newline_header_injection_rejected(self):
+    def test_url_path_with_newline_header_injection_rejected(self):
         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
         host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
         schemeless_url = "//" + host + ":8080/test/?test=a"
@@ -385,6 +385,38 @@ def test_url_with_newline_header_injection_rejected(self):
         finally:
             self.unfakehttp()
 
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_control_char_rejected(self):
+        for char_no in list(range(0, 0x21)) + [0x7f]:
+            char = chr(char_no)
+            schemeless_url = "//localhost{}/test/".format(char)
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+            try:
+                escaped_char_repr = repr(char).replace('\\', r'\\')
+                InvalidURL = http.client.InvalidURL
+                with self.assertRaisesRegex(
+                    InvalidURL, r"contain control.*{}".format(escaped_char_repr)):
+                    urlopen("http:{}".format(schemeless_url))
+                with self.assertRaisesRegex(InvalidURL, r"contain control.*{}".format(escaped_char_repr)):
+                    urlopen("http:{}".format(schemeless_url))
+            finally:
+                self.unfakehttp()
+
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_newline_header_injection_rejected(self):
+        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+        host = "localhost\r\nX-injected: header\r\n"
+        schemeless_url = "//" + host + ":8080/test/?test=a"
+        try:
+            InvalidURL = http.client.InvalidURL
+            with self.assertRaisesRegex(
+                InvalidURL, r"contain control.*\\r"):
+                urlopen("http:{}".format(schemeless_url))
+            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
+                urlopen("http:{}".format(schemeless_url))
+        finally:
+            self.unfakehttp()
+
     def test_read_0_9(self):
         # "0.9" response accepted (but not "simple responses" without
         # a status line)
diff --git a/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
new file mode 100644
index 0000000000000..1d03574651725
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
@@ -0,0 +1 @@
+Disallow control characters in hostnames in http.client, addressing CVE-2019-18348. Such potentially malicious header injection URLs now cause a InvalidURL to be raised.



More information about the Python-checkins mailing list