[Python-checkins] gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (#8305)

orsenthil webhook-mailer at python.org
Wed Apr 5 00:55:41 EDT 2023


https://github.com/python/cpython/commit/1a8f862e329c3872a11d4ef8eb85cf353ca2f4d5
commit: 1a8f862e329c3872a11d4ef8eb85cf353ca2f4d5
branch: main
author: Michael Handler <mh at grendel.net>
committer: orsenthil <senthilx at amazon.com>
date: 2023-04-04T21:55:24-07:00
summary:

gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (#8305)

* bpo-22708: Upgrade HTTP CONNECT to protocol HTTP/1.1 (GH-NNNN)

Use protocol HTTP/1.1 when sending HTTP CONNECT tunnelling requests;
generate Host: headers if one is not already provided (required by
HTTP/1.1), convert IDN domains to punycode in HTTP CONNECT requests.

* Refactor tests to pass under -bb (fix ByteWarnings); missed some lines >80.

* Use consistent 'tunnelling' spelling in Lib/http/client.py

* Lib/test/test_httplib: Remove remnant of obsoleted test.

* Use dict.copy() not copy.copy()

* fix version changed

* Update Lib/http/client.py

Co-authored-by: bgehman <bgehman at users.noreply.github.com>

* Switch to for/else: syntax, as suggested

* Don't use for: else:

* Sure, fine, w/e

* Oops

* 1nm to the left

---------

Co-authored-by: Éric <merwok at netwok.org>
Co-authored-by: bgehman <bgehman at users.noreply.github.com>
Co-authored-by: Oleg Iarygin <oleg at arhadthedev.net>

files:
A Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst
M Doc/library/http.client.rst
M Lib/http/client.py
M Lib/test/test_httplib.py
M Misc/ACKS

diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index ad3416135e30..38821b32c91c 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -353,6 +353,13 @@ HTTPConnection Objects
    The *headers* argument should be a mapping of extra HTTP headers to send with
    the CONNECT request.
 
+   As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
+   <https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
+   header must be provided, matching the authority-form of the request target
+   provided as the destination for the CONNECT request. If a HTTP ``Host:``
+   header is not provided via the headers argument, one is generated and
+   transmitted automatically.
+
    For example, to tunnel through a HTTPS proxy server running locally on port
    8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
    constructor, and the address of the host that we eventually want to reach to
@@ -365,6 +372,11 @@ HTTPConnection Objects
 
    .. versionadded:: 3.2
 
+   .. versionchanged:: 3.12
+      HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
+      protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
+      one will be automatically generated and transmitted if not provided in
+      the headers argument.
 
 .. method:: HTTPConnection.connect()
 
diff --git a/Lib/http/client.py b/Lib/http/client.py
index bd55e7d239af..0f5cd35247ae 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -870,9 +870,9 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
     def set_tunnel(self, host, port=None, headers=None):
         """Set up host and port for HTTP CONNECT tunnelling.
 
-        In a connection that uses HTTP CONNECT tunneling, the host passed to the
-        constructor is used as a proxy server that relays all communication to
-        the endpoint passed to `set_tunnel`. This done by sending an HTTP
+        In a connection that uses HTTP CONNECT tunnelling, the host passed to
+        the constructor is used as a proxy server that relays all communication
+        to the endpoint passed to `set_tunnel`. This done by sending an HTTP
         CONNECT request to the proxy server when the connection is established.
 
         This method must be called before the HTTP connection has been
@@ -880,6 +880,13 @@ def set_tunnel(self, host, port=None, headers=None):
 
         The headers argument should be a mapping of extra HTTP headers to send
         with the CONNECT request.
+
+        As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
+        (https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
+        header must be provided, matching the authority-form of the request
+        target provided as the destination for the CONNECT request. If a
+        HTTP Host: header is not provided via the headers argument, one
+        is generated and transmitted automatically.
         """
 
         if self.sock:
@@ -887,10 +894,15 @@ def set_tunnel(self, host, port=None, headers=None):
 
         self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
         if headers:
-            self._tunnel_headers = headers
+            self._tunnel_headers = headers.copy()
         else:
             self._tunnel_headers.clear()
 
+        if not any(header.lower() == "host" for header in self._tunnel_headers):
+            encoded_host = self._tunnel_host.encode("idna").decode("ascii")
+            self._tunnel_headers["Host"] = "%s:%d" % (
+                encoded_host, self._tunnel_port)
+
     def _get_hostport(self, host, port):
         if port is None:
             i = host.rfind(':')
@@ -915,8 +927,9 @@ def set_debuglevel(self, level):
         self.debuglevel = level
 
     def _tunnel(self):
-        connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
-            self._tunnel_host.encode("ascii"), self._tunnel_port)
+        connect = b"CONNECT %s:%d %s\r\n" % (
+            self._tunnel_host.encode("idna"), self._tunnel_port,
+            self._http_vsn_str.encode("ascii"))
         headers = [connect]
         for header, value in self._tunnel_headers.items():
             headers.append(f"{header}: {value}\r\n".encode("latin-1"))
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 9ff6afcbadec..b4f4e2b14351 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -2187,11 +2187,12 @@ def test_getting_header_defaultint(self):
 class TunnelTests(TestCase):
     def setUp(self):
         response_text = (
-            'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT
+            'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
             'HTTP/1.1 200 OK\r\n' # Reply to HEAD
             'Content-Length: 42\r\n\r\n'
         )
         self.host = 'proxy.com'
+        self.port = client.HTTP_PORT
         self.conn = client.HTTPConnection(self.host)
         self.conn._create_connection = self._create_connection(response_text)
 
@@ -2203,15 +2204,45 @@ def create_connection(address, timeout=None, source_address=None):
             return FakeSocket(response_text, host=address[0], port=address[1])
         return create_connection
 
-    def test_set_tunnel_host_port_headers(self):
+    def test_set_tunnel_host_port_headers_add_host_missing(self):
         tunnel_host = 'destination.com'
         tunnel_port = 8888
         tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
+        tunnel_headers_after = tunnel_headers.copy()
+        tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
         self.conn.set_tunnel(tunnel_host, port=tunnel_port,
                              headers=tunnel_headers)
         self.conn.request('HEAD', '/', '')
         self.assertEqual(self.conn.sock.host, self.host)
-        self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertEqual(self.conn._tunnel_host, tunnel_host)
+        self.assertEqual(self.conn._tunnel_port, tunnel_port)
+        self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)
+
+    def test_set_tunnel_host_port_headers_set_host_identical(self):
+        tunnel_host = 'destination.com'
+        tunnel_port = 8888
+        tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
+                          'Host': '%s:%d' % (tunnel_host, tunnel_port)}
+        self.conn.set_tunnel(tunnel_host, port=tunnel_port,
+                             headers=tunnel_headers)
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertEqual(self.conn._tunnel_host, tunnel_host)
+        self.assertEqual(self.conn._tunnel_port, tunnel_port)
+        self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
+
+    def test_set_tunnel_host_port_headers_set_host_different(self):
+        tunnel_host = 'destination.com'
+        tunnel_port = 8888
+        tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
+                          'Host': '%s:%d' % ('example.com', 4200)}
+        self.conn.set_tunnel(tunnel_host, port=tunnel_port,
+                             headers=tunnel_headers)
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
         self.assertEqual(self.conn._tunnel_host, tunnel_host)
         self.assertEqual(self.conn._tunnel_port, tunnel_port)
         self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
@@ -2223,17 +2254,96 @@ def test_disallow_set_tunnel_after_connect(self):
                           'destination.com')
 
     def test_connect_with_tunnel(self):
-        self.conn.set_tunnel('destination.com')
+        d = {
+            b'host': b'destination.com',
+            b'port': client.HTTP_PORT,
+        }
+        self.conn.set_tunnel(d[b'host'].decode('ascii'))
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'Host: %(host)s:%(port)d\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
+                      self.conn.sock.data)
+
+    def test_connect_with_tunnel_with_default_port(self):
+        d = {
+            b'host': b'destination.com',
+            b'port': client.HTTP_PORT,
+        }
+        self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'Host: %(host)s:%(port)d\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
+                      self.conn.sock.data)
+
+    def test_connect_with_tunnel_with_nonstandard_port(self):
+        d = {
+            b'host': b'destination.com',
+            b'port': 8888,
+        }
+        self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'Host: %(host)s:%(port)d\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
+                      self.conn.sock.data)
+
+    # This request is not RFC-valid, but it's been possible with the library
+    # for years, so don't break it unexpectedly... This also tests
+    # case-insensitivity when injecting Host: headers if they're missing.
+    def test_connect_with_tunnel_with_different_host_header(self):
+        d = {
+            b'host': b'destination.com',
+            b'tunnel_host_header': b'example.com:9876',
+            b'port': client.HTTP_PORT,
+        }
+        self.conn.set_tunnel(
+            d[b'host'].decode('ascii'),
+            headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
+                      self.conn.sock.data)
+
+    def test_connect_with_tunnel_different_host(self):
+        d = {
+            b'host': b'destination.com',
+            b'port': client.HTTP_PORT,
+        }
+        self.conn.set_tunnel(d[b'host'].decode('ascii'))
+        self.conn.request('HEAD', '/', '')
+        self.assertEqual(self.conn.sock.host, self.host)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'Host: %(host)s:%(port)d\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
+                      self.conn.sock.data)
+
+    def test_connect_with_tunnel_idna(self):
+        dest = '\u03b4\u03c0\u03b8.gr'
+        dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
+        expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
+            dest_port, dest_port)
+        self.conn.set_tunnel(dest)
         self.conn.request('HEAD', '/', '')
         self.assertEqual(self.conn.sock.host, self.host)
         self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
-        self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
-        # issue22095
-        self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
-        self.assertIn(b'Host: destination.com', self.conn.sock.data)
-
-        # This test should be removed when CONNECT gets the HTTP/1.1 blessing
-        self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
+        self.assertIn(expected, self.conn.sock.data)
 
     def test_tunnel_connect_single_send_connection_setup(self):
         """Regresstion test for https://bugs.python.org/issue43332."""
@@ -2253,12 +2363,19 @@ def test_tunnel_connect_single_send_connection_setup(self):
                 msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}')
 
     def test_connect_put_request(self):
-        self.conn.set_tunnel('destination.com')
+        d = {
+            b'host': b'destination.com',
+            b'port': client.HTTP_PORT,
+        }
+        self.conn.set_tunnel(d[b'host'].decode('ascii'))
         self.conn.request('PUT', '/', '')
         self.assertEqual(self.conn.sock.host, self.host)
-        self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
-        self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
-        self.assertIn(b'Host: destination.com', self.conn.sock.data)
+        self.assertEqual(self.conn.sock.port, self.port)
+        self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
+                      b'Host: %(host)s:%(port)d\r\n\r\n' % d,
+                      self.conn.sock.data)
+        self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
+                      self.conn.sock.data)
 
     def test_tunnel_debuglog(self):
         expected_header = 'X-Dummy: 1'
diff --git a/Misc/ACKS b/Misc/ACKS
index 8cf5166a2bb1..929e06a87cb7 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -693,6 +693,7 @@ Anders Hammarquist
 Mark Hammond
 Harald Hanche-Olsen
 Manus Hand
+Michael Handler
 Andreas Hangauer
 Milton L. Hankins
 Carl Bordum Hansen
diff --git a/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst b/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst
new file mode 100644
index 000000000000..00bcf38bbcdf
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-07-16-14-10-29.bpo-22708.592iRR.rst
@@ -0,0 +1,3 @@
+http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
+a matching Host: header with CONNECT, if one is not provided; convert IDN
+domain names to Punycode. Patch by Michael Handler.



More information about the Python-checkins mailing list