[Python-checkins] [3.9] bpo-42853: Fix http.client fails to download >2GiB data over TLS (GH-27405)

ambv webhook-mailer at python.org
Wed Jul 28 09:27:58 EDT 2021


https://github.com/python/cpython/commit/153365d864c411f6fb523efa752ccb3497d815ca
commit: 153365d864c411f6fb523efa752ccb3497d815ca
branch: 3.9
author: Inada Naoki <songofacandy at gmail.com>
committer: ambv <lukasz at langa.pl>
date: 2021-07-28T15:27:49+02:00
summary:

[3.9] bpo-42853: Fix http.client fails to download >2GiB data over TLS (GH-27405)

Revert "bpo-36050: optimize HTTPResponse.read() (GH-12698)"

This reverts commit d6bf6f2d0c83f0c64ce86e7b9340278627798090.

files:
A Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst
M Lib/http/client.py

diff --git a/Lib/http/client.py b/Lib/http/client.py
index 975292505836e1..0fd9021b4a785d 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -105,6 +105,9 @@
 # Mapping status codes to official W3C names
 responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
 
+# maximal amount of data to read at one time in _safe_read
+MAXAMOUNT = 1048576
+
 # maximal line length when calling readline().
 _MAXLINE = 65536
 _MAXHEADERS = 100
@@ -604,24 +607,43 @@ def _readinto_chunked(self, b):
             raise IncompleteRead(bytes(b[0:total_bytes]))
 
     def _safe_read(self, amt):
-        """Read the number of bytes requested.
+        """Read the number of bytes requested, compensating for partial reads.
+
+        Normally, we have a blocking socket, but a read() can be interrupted
+        by a signal (resulting in a partial read).
+
+        Note that we cannot distinguish between EOF and an interrupt when zero
+        bytes have been read. IncompleteRead() will be raised in this
+        situation.
 
         This function should be used when <amt> bytes "should" be present for
         reading. If the bytes are truly not available (due to EOF), then the
         IncompleteRead exception can be used to detect the problem.
         """
-        data = self.fp.read(amt)
-        if len(data) < amt:
-            raise IncompleteRead(data, amt-len(data))
-        return data
+        s = []
+        while amt > 0:
+            chunk = self.fp.read(min(amt, MAXAMOUNT))
+            if not chunk:
+                raise IncompleteRead(b''.join(s), amt)
+            s.append(chunk)
+            amt -= len(chunk)
+        return b"".join(s)
 
     def _safe_readinto(self, b):
         """Same as _safe_read, but for reading into a buffer."""
-        amt = len(b)
-        n = self.fp.readinto(b)
-        if n < amt:
-            raise IncompleteRead(bytes(b[:n]), amt-n)
-        return n
+        total_bytes = 0
+        mvb = memoryview(b)
+        while total_bytes < len(b):
+            if MAXAMOUNT < len(mvb):
+                temp_mvb = mvb[0:MAXAMOUNT]
+                n = self.fp.readinto(temp_mvb)
+            else:
+                n = self.fp.readinto(mvb)
+            if not n:
+                raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
+            mvb = mvb[n:]
+            total_bytes += n
+        return total_bytes
 
     def read1(self, n=-1):
         """Read with at most one underlying system call.  If at least one
diff --git a/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst b/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst
new file mode 100644
index 00000000000000..aaf8af0fdfa993
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst
@@ -0,0 +1 @@
+Fix ``http.client.HTTPSConnection`` fails to download >2GiB data.



More information about the Python-checkins mailing list