[Python-checkins] bpo-41486: zlib uses an UINT32_MAX sliding window for the output buffer (GH-26143)

miss-islington webhook-mailer at python.org
Sun Jul 4 21:33:04 EDT 2021


https://github.com/python/cpython/commit/22bcc0768e0f7eda2ae4de63aef113b1ddb4ddef
commit: 22bcc0768e0f7eda2ae4de63aef113b1ddb4ddef
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-07-04T18:32:56-07:00
summary:

bpo-41486: zlib uses an UINT32_MAX sliding window for the output buffer (GH-26143)


* zlib uses an UINT32_MAX sliding window for the output buffer

These funtions have an initial output buffer size parameter:
- zlib.decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
- zlib.Decompress.flush([length])

If the initial size > UINT32_MAX, use an UINT32_MAX sliding window, instead of clamping to UINT32_MAX.
Speed up when (the initial size == the actual size).

This fixes a memory consumption and copying performance regression in earlier 3.10 beta releases if someone used an output buffer larger than 4GiB with zlib.decompress.

Reviewed-by: Gregory P. Smith
(cherry picked from commit a9a69bb3ea1e6cf54513717212aaeae0d61b24ee)

Co-authored-by: Ma Lin <animalize at users.noreply.github.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2021-07-04-17-41-47.bpo-41486.DiM24a.rst
M Modules/zlibmodule.c

diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-07-04-17-41-47.bpo-41486.DiM24a.rst b/Misc/NEWS.d/next/Core and Builtins/2021-07-04-17-41-47.bpo-41486.DiM24a.rst
new file mode 100644
index 0000000000000..6a373f67f62f9
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-07-04-17-41-47.bpo-41486.DiM24a.rst	
@@ -0,0 +1,3 @@
+Fix a memory consumption and copying performance regression in earlier 3.10
+beta releases if someone used an output buffer larger than 4GiB with
+zlib.decompress on input data that expands that large.
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index fe06094b0027a..3efb24a679caa 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -30,28 +30,6 @@ OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
     return allocated;
 }
 
-/* On success, return value >= 0
-   On failure, return -1 */
-static inline Py_ssize_t
-OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, Py_ssize_t init_size,
-                          Bytef **next_out, uint32_t *avail_out)
-{
-    Py_ssize_t allocated;
-
-    if (init_size >= 0 &&  // ensure (size_t) cast is safe
-        (size_t)init_size > UINT32_MAX)
-    {
-        /* In 32-bit build, never reach this conditional branch.
-           The maximum block size accepted by zlib is UINT32_MAX. */
-        init_size = UINT32_MAX;
-    }
-
-    allocated = _BlocksOutputBuffer_InitWithSize(
-                    buffer, init_size, (void**) next_out);
-    *avail_out = (uint32_t) allocated;
-    return allocated;
-}
-
 /* On success, return value >= 0
    On failure, return -1 */
 static inline Py_ssize_t
@@ -84,6 +62,106 @@ OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
     _BlocksOutputBuffer_OnError(buffer);
 }
 
+/* The max buffer size accepted by zlib is UINT32_MAX, the initial buffer size
+   `init_size` may > it in 64-bit build. These wrapper functions maintain an
+   UINT32_MAX sliding window for the first block:
+    1. OutputBuffer_WindowInitWithSize()
+    2. OutputBuffer_WindowGrow()
+    3. OutputBuffer_WindowFinish()
+    4. OutputBuffer_WindowOnError()
+
+   ==== is the sliding window:
+    1. ====------
+           ^ next_posi, left_bytes is 6
+    2. ----====--
+               ^ next_posi, left_bytes is 2
+    3. --------==
+                 ^ next_posi, left_bytes is 0  */
+typedef struct {
+    Py_ssize_t left_bytes;
+    Bytef *next_posi;
+} _Uint32Window;
+
+/* Initialize the buffer with an inital buffer size.
+
+   On success, return value >= 0
+   On failure, return value < 0 */
+static inline Py_ssize_t
+OutputBuffer_WindowInitWithSize(_BlocksOutputBuffer *buffer, _Uint32Window *window,
+                                Py_ssize_t init_size,
+                                Bytef **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated = _BlocksOutputBuffer_InitWithSize(
+                               buffer, init_size, (void**) next_out);
+
+    if (allocated >= 0) {
+        // the UINT32_MAX sliding window
+        Py_ssize_t window_size = Py_MIN((size_t)allocated, UINT32_MAX);
+        *avail_out = (uint32_t) window_size;
+
+        window->left_bytes = allocated - window_size;
+        window->next_posi = *next_out + window_size;
+    }
+    return allocated;
+}
+
+/* Grow the buffer.
+
+   On success, return value >= 0
+   On failure, return value < 0 */
+static inline Py_ssize_t
+OutputBuffer_WindowGrow(_BlocksOutputBuffer *buffer, _Uint32Window *window,
+                        Bytef **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    /* ensure no gaps in the data.
+       if inlined, this check could be optimized away.*/
+    if (*avail_out != 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "*avail_out != 0 in OutputBuffer_WindowGrow().");
+        return -1;
+    }
+
+    // slide the UINT32_MAX sliding window
+    if (window->left_bytes > 0) {
+        Py_ssize_t window_size = Py_MIN((size_t)window->left_bytes, UINT32_MAX);
+
+        *next_out = window->next_posi;
+        *avail_out = (uint32_t) window_size;
+
+        window->left_bytes -= window_size;
+        window->next_posi += window_size;
+
+        return window_size;
+    }
+    assert(window->left_bytes == 0);
+
+    // only the first block may > UINT32_MAX
+    allocated = _BlocksOutputBuffer_Grow(
+                    buffer, (void**) next_out, (Py_ssize_t) *avail_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+/* Finish the buffer.
+
+   On success, return a bytes object
+   On failure, return NULL */
+static inline PyObject *
+OutputBuffer_WindowFinish(_BlocksOutputBuffer *buffer, _Uint32Window *window,
+                          uint32_t avail_out)
+{
+    Py_ssize_t real_avail_out = (Py_ssize_t) avail_out + window->left_bytes;
+    return _BlocksOutputBuffer_Finish(buffer, real_avail_out);
+}
+
+static inline void
+OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window)
+{
+    _BlocksOutputBuffer_OnError(buffer);
+}
+
 
 #define ENTER_ZLIB(obj) do {                      \
     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@@ -344,6 +422,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
     int err, flush;
     z_stream zst;
     _BlocksOutputBuffer buffer = {.list = NULL};
+    _Uint32Window window;  // output buffer's UINT32_MAX sliding window
 
     zlibstate *state = get_zlib_state(module);
 
@@ -354,7 +433,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
         bufsize = 1;
     }
 
-    if (OutputBuffer_InitWithSize(&buffer, bufsize, &zst.next_out, &zst.avail_out) < 0) {
+    if (OutputBuffer_WindowInitWithSize(&buffer, &window, bufsize,
+                                        &zst.next_out, &zst.avail_out) < 0) {
         goto error;
     }
 
@@ -387,7 +467,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
 
         do {
             if (zst.avail_out == 0) {
-                if (OutputBuffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) {
+                if (OutputBuffer_WindowGrow(&buffer, &window,
+                                            &zst.next_out, &zst.avail_out) < 0) {
                     inflateEnd(&zst);
                     goto error;
                 }
@@ -430,13 +511,13 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
         goto error;
     }
 
-    RetVal = OutputBuffer_Finish(&buffer, zst.avail_out);
+    RetVal = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out);
     if (RetVal != NULL) {
         return RetVal;
     }
 
  error:
-    OutputBuffer_OnError(&buffer);
+    OutputBuffer_WindowOnError(&buffer, &window);
     return NULL;
 }
 
@@ -1171,6 +1252,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
     PyObject *RetVal;
     Py_ssize_t ibuflen;
     _BlocksOutputBuffer buffer = {.list = NULL};
+    _Uint32Window window;  // output buffer's UINT32_MAX sliding window
 
     PyObject *module = PyType_GetModule(cls);
     if (module == NULL) {
@@ -1193,7 +1275,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
     self->zst.next_in = data.buf;
     ibuflen = data.len;
 
-    if (OutputBuffer_InitWithSize(&buffer, length, &self->zst.next_out, &self->zst.avail_out) < 0) {
+    if (OutputBuffer_WindowInitWithSize(&buffer, &window, length,
+                                        &self->zst.next_out, &self->zst.avail_out) < 0) {
         goto abort;
     }
 
@@ -1203,7 +1286,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
 
         do {
             if (self->zst.avail_out == 0) {
-                if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) {
+                if (OutputBuffer_WindowGrow(&buffer, &window,
+                                            &self->zst.next_out, &self->zst.avail_out) < 0) {
                     goto abort;
                 }
             }
@@ -1248,13 +1332,13 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
         }
     }
 
-    RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
+    RetVal = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out);
     if (RetVal != NULL) {
         goto success;
     }
 
  abort:
-    OutputBuffer_OnError(&buffer);
+    OutputBuffer_WindowOnError(&buffer, &window);
     RetVal = NULL;
  success:
     PyBuffer_Release(&data);



More information about the Python-checkins mailing list