[Python-checkins] cpython (merge 3.5 -> default): Issue #27130: Merge zlib 64-bit fixes from 3.5

martin.panter python-checkins at python.org
Sat Jul 23 00:12:28 EDT 2016


https://hg.python.org/cpython/rev/bd556f748cf8
changeset:   102421:bd556f748cf8
parent:      102419:7c0917670ab8
parent:      102420:bd61bcd9bee8
user:        Martin Panter <vadmium+py at gmail.com>
date:        Sat Jul 23 03:39:49 2016 +0000
summary:
  Issue #27130: Merge zlib 64-bit fixes from 3.5

files:
  Doc/howto/clinic.rst          |   12 +-
  Lib/test/test_zlib.py         |   64 +-
  Misc/NEWS                     |    5 +
  Modules/clinic/zlibmodule.c.h |   20 +-
  Modules/zlibmodule.c          |  741 ++++++++++-----------
  5 files changed, 430 insertions(+), 412 deletions(-)


diff --git a/Doc/howto/clinic.rst b/Doc/howto/clinic.rst
--- a/Doc/howto/clinic.rst
+++ b/Doc/howto/clinic.rst
@@ -1249,17 +1249,17 @@
 
     /*[python input]
 
-    class capped_uint_converter(CConverter):
-        type = 'unsigned int'
-        converter = 'capped_uint_converter'
+    class ssize_t_converter(CConverter):
+        type = 'Py_ssize_t'
+        converter = 'ssize_t_converter'
 
     [python start generated code]*/
     /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
 
-This block adds a converter to Argument Clinic named ``capped_uint``.  Parameters
-declared as ``capped_uint`` will be declared as type ``unsigned int``, and will
+This block adds a converter to Argument Clinic named ``ssize_t``.  Parameters
+declared as ``ssize_t`` will be declared as type ``Py_ssize_t``, and will
 be parsed by the ``'O&'`` format unit, which will call the
-``capped_uint_converter`` converter function.  ``capped_uint`` variables
+``ssize_t_converter`` converter function.  ``ssize_t`` variables
 automatically support default values.
 
 More sophisticated custom converters can insert custom C code to
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -122,6 +122,8 @@
         with self.assertRaisesRegex(OverflowError, 'int too large'):
             zlib.decompress(b'', 15, sys.maxsize + 1)
         with self.assertRaisesRegex(OverflowError, 'int too large'):
+            zlib.decompressobj().decompress(b'', sys.maxsize + 1)
+        with self.assertRaisesRegex(OverflowError, 'int too large'):
             zlib.decompressobj().flush(sys.maxsize + 1)
 
 
@@ -194,15 +196,6 @@
     def test_big_decompress_buffer(self, size):
         self.check_big_decompress_buffer(size, zlib.decompress)
 
-    @bigmemtest(size=_4G + 100, memuse=1, dry_run=False)
-    def test_length_overflow(self, size):
-        data = b'x' * size
-        try:
-            self.assertRaises(OverflowError, zlib.compress, data, 1)
-            self.assertRaises(OverflowError, zlib.decompress, data)
-        finally:
-            data = None
-
     @bigmemtest(size=_4G, memuse=1)
     def test_large_bufsize(self, size):
         # Test decompress(bufsize) parameter greater than the internal limit
@@ -215,6 +208,16 @@
         compressed = zlib.compress(data, 1)
         self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
 
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @bigmemtest(size=_4G + 100, memuse=4)
+    def test_64bit_compress(self, size):
+        data = b'x' * size
+        try:
+            comp = zlib.compress(data, 0)
+            self.assertEqual(zlib.decompress(comp), data)
+        finally:
+            comp = data = None
+
 
 class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
     # Test compression object
@@ -684,16 +687,45 @@
         decompress = lambda s: d.decompress(s) + d.flush()
         self.check_big_decompress_buffer(size, decompress)
 
-    @bigmemtest(size=_4G + 100, memuse=1, dry_run=False)
-    def test_length_overflow(self, size):
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @bigmemtest(size=_4G + 100, memuse=4)
+    def test_64bit_compress(self, size):
         data = b'x' * size
-        c = zlib.compressobj(1)
-        d = zlib.decompressobj()
+        co = zlib.compressobj(0)
+        do = zlib.decompressobj()
         try:
-            self.assertRaises(OverflowError, c.compress, data)
-            self.assertRaises(OverflowError, d.decompress, data)
+            comp = co.compress(data) + co.flush()
+            uncomp = do.decompress(comp) + do.flush()
+            self.assertEqual(uncomp, data)
         finally:
-            data = None
+            comp = uncomp = data = None
+
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @bigmemtest(size=_4G + 100, memuse=3)
+    def test_large_unused_data(self, size):
+        data = b'abcdefghijklmnop'
+        unused = b'x' * size
+        comp = zlib.compress(data) + unused
+        do = zlib.decompressobj()
+        try:
+            uncomp = do.decompress(comp) + do.flush()
+            self.assertEqual(unused, do.unused_data)
+            self.assertEqual(uncomp, data)
+        finally:
+            unused = comp = do = None
+
+    @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
+    @bigmemtest(size=_4G + 100, memuse=5)
+    def test_large_unconsumed_tail(self, size):
+        data = b'x' * size
+        do = zlib.decompressobj()
+        try:
+            comp = zlib.compress(data, 0)
+            uncomp = do.decompress(comp, 1) + do.flush()
+            self.assertEqual(uncomp, data)
+            self.assertEqual(do.unconsumed_tail, b'')
+        finally:
+            comp = uncomp = data = None
 
     def test_wbits(self):
         # wbits=0 only supported since zlib v1.2.3.5
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,11 @@
 Library
 -------
 
+- Issue #27130: In the "zlib" module, fix handling of large buffers
+  (typically 4 GiB) when compressing and decompressing.  Previously, inputs
+  were limited to 4 GiB, and compression and decompression operations did not
+  properly handle results of 4 GiB.
+
 - Issue #24773: Implemented PEP 495 (Local Time Disambiguation).
 
 - Expose the EPOLLEXCLUSIVE constant (when it is defined) in the select module.
diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h
--- a/Modules/clinic/zlibmodule.c.h
+++ b/Modules/clinic/zlibmodule.c.h
@@ -60,7 +60,7 @@
 
 static PyObject *
 zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
-                     unsigned int bufsize);
+                     Py_ssize_t bufsize);
 
 static PyObject *
 zlib_decompress(PyObject *module, PyObject *args)
@@ -68,10 +68,10 @@
     PyObject *return_value = NULL;
     Py_buffer data = {NULL, NULL};
     int wbits = MAX_WBITS;
-    unsigned int bufsize = DEF_BUF_SIZE;
+    Py_ssize_t bufsize = DEF_BUF_SIZE;
 
     if (!PyArg_ParseTuple(args, "y*|iO&:decompress",
-        &data, &wbits, capped_uint_converter, &bufsize)) {
+        &data, &wbits, ssize_t_converter, &bufsize)) {
         goto exit;
     }
     return_value = zlib_decompress_impl(module, &data, wbits, bufsize);
@@ -246,17 +246,17 @@
 
 static PyObject *
 zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
-                                unsigned int max_length);
+                                Py_ssize_t max_length);
 
 static PyObject *
 zlib_Decompress_decompress(compobject *self, PyObject *args)
 {
     PyObject *return_value = NULL;
     Py_buffer data = {NULL, NULL};
-    unsigned int max_length = 0;
+    Py_ssize_t max_length = 0;
 
     if (!PyArg_ParseTuple(args, "y*|O&:decompress",
-        &data, capped_uint_converter, &max_length)) {
+        &data, ssize_t_converter, &max_length)) {
         goto exit;
     }
     return_value = zlib_Decompress_decompress_impl(self, &data, max_length);
@@ -361,16 +361,16 @@
     {"flush", (PyCFunction)zlib_Decompress_flush, METH_VARARGS, zlib_Decompress_flush__doc__},
 
 static PyObject *
-zlib_Decompress_flush_impl(compobject *self, unsigned int length);
+zlib_Decompress_flush_impl(compobject *self, Py_ssize_t length);
 
 static PyObject *
 zlib_Decompress_flush(compobject *self, PyObject *args)
 {
     PyObject *return_value = NULL;
-    unsigned int length = DEF_BUF_SIZE;
+    Py_ssize_t length = DEF_BUF_SIZE;
 
     if (!PyArg_ParseTuple(args, "|O&:flush",
-        capped_uint_converter, &length)) {
+        ssize_t_converter, &length)) {
         goto exit;
     }
     return_value = zlib_Decompress_flush_impl(self, length);
@@ -460,4 +460,4 @@
 #ifndef ZLIB_COMPRESS_COPY_METHODDEF
     #define ZLIB_COMPRESS_COPY_METHODDEF
 #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
-/*[clinic end generated code: output=519446af912f4e72 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=9046866b1ac5de7e input=a9049054013a1b77]*/
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -1,5 +1,5 @@
 /* zlibmodule.c -- gzip-compatible data compression */
-/* See http://www.gzip.org/zlib/ */
+/* See http://zlib.net/ */
 
 /* Windows users:  read Python's PCbuild\readme.txt */
 
@@ -23,7 +23,7 @@
 #endif
 
 #if defined(ZLIB_VERNUM) && ZLIB_VERNUM >= 0x1221
-#define AT_LEAST_ZLIB_1_2_2_1
+#  define AT_LEAST_ZLIB_1_2_2_1
 #endif
 
 /* The following parameters are copied from zutil.h, version 0.95 */
@@ -115,6 +115,7 @@
 #ifdef WITH_THREAD
     self->lock = PyThread_allocate_lock();
     if (self->lock == NULL) {
+        Py_DECREF(self);
         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
         return NULL;
     }
@@ -138,6 +139,63 @@
     PyMem_RawFree(ptr);
 }
 
+static void
+arrange_input_buffer(z_stream *zst, Py_ssize_t *remains)
+{
+    zst->avail_in = Py_MIN((size_t)*remains, UINT_MAX);
+    *remains -= zst->avail_in;
+}
+
+static Py_ssize_t
+arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer,
+                                   Py_ssize_t length,
+                                   Py_ssize_t max_length)
+{
+    Py_ssize_t occupied;
+
+    if (*buffer == NULL) {
+        if (!(*buffer = PyBytes_FromStringAndSize(NULL, length)))
+            return -1;
+        occupied = 0;
+    }
+    else {
+        occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer);
+
+        if (length == occupied) {
+            Py_ssize_t new_length;
+            assert(length <= max_length);
+            /* can not scale the buffer over max_length */
+            if (length == max_length)
+                return -2;
+            if (length <= (max_length >> 1))
+                new_length = length << 1;
+            else
+                new_length = max_length;
+            if (_PyBytes_Resize(buffer, new_length) < 0)
+                return -1;
+            length = new_length;
+        }
+    }
+
+    zst->avail_out = Py_MIN((size_t)(length - occupied), UINT_MAX);
+    zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied;
+
+    return length;
+}
+
+static Py_ssize_t
+arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length)
+{
+    Py_ssize_t ret;
+
+    ret = arrange_output_buffer_with_maximum(zst, buffer, length,
+                                             PY_SSIZE_T_MAX);
+    if (ret == -2)
+        PyErr_NoMemory();
+
+    return ret;
+}
+
 /*[clinic input]
 zlib.compress
 
@@ -154,50 +212,30 @@
 zlib_compress_impl(PyObject *module, Py_buffer *data, int level)
 /*[clinic end generated code: output=d80906d73f6294c8 input=638d54b6315dbed3]*/
 {
-    PyObject *ReturnVal = NULL;
-    Byte *input, *output = NULL;
-    unsigned int length;
-    int err;
+    PyObject *RetVal = NULL;
+    Byte *ibuf;
+    Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE;
+    int err, flush;
     z_stream zst;
 
-    if ((size_t)data->len > UINT_MAX) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "Size does not fit in an unsigned int");
-        goto error;
-    }
-    input = data->buf;
-    length = (unsigned int)data->len;
-
-    zst.avail_out = length + length/1000 + 12 + 1;
-
-    output = (Byte*)PyMem_Malloc(zst.avail_out);
-    if (output == NULL) {
-        PyErr_SetString(PyExc_MemoryError,
-                        "Can't allocate memory to compress data");
-        goto error;
-    }
-
-    /* Past the point of no return.  From here on out, we need to make sure
-       we clean up mallocs & INCREFs. */
+    ibuf = data->buf;
+    ibuflen = data->len;
 
     zst.opaque = NULL;
     zst.zalloc = PyZlib_Malloc;
     zst.zfree = PyZlib_Free;
-    zst.next_out = (Byte *)output;
-    zst.next_in = (Byte *)input;
-    zst.avail_in = length;
+    zst.next_in = ibuf;
     err = deflateInit(&zst, level);
 
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Out of memory while compressing data");
         goto error;
-    case(Z_STREAM_ERROR):
-        PyErr_SetString(ZlibError,
-                        "Bad compression level");
+    case Z_STREAM_ERROR:
+        PyErr_SetString(ZlibError, "Bad compression level");
         goto error;
     default:
         deflateEnd(&zst);
@@ -205,41 +243,59 @@
         goto error;
     }
 
-    Py_BEGIN_ALLOW_THREADS;
-    err = deflate(&zst, Z_FINISH);
-    Py_END_ALLOW_THREADS;
+    do {
+        arrange_input_buffer(&zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-    if (err != Z_STREAM_END) {
-        zlib_error(zst, err, "while compressing data");
-        deflateEnd(&zst);
-        goto error;
+        do {
+            obuflen = arrange_output_buffer(&zst, &RetVal, obuflen);
+            if (obuflen < 0) {
+                deflateEnd(&zst);
+                goto error;
+            }
+
+            Py_BEGIN_ALLOW_THREADS
+            err = deflate(&zst, flush);
+            Py_END_ALLOW_THREADS
+
+            if (err == Z_STREAM_ERROR) {
+                deflateEnd(&zst);
+                zlib_error(zst, err, "while compressing data");
+                goto error;
+            }
+
+        } while (zst.avail_out == 0);
+        assert(zst.avail_in == 0);
+
+    } while (flush != Z_FINISH);
+    assert(err == Z_STREAM_END);
+
+    err = deflateEnd(&zst);
+    if (err == Z_OK) {
+        if (_PyBytes_Resize(&RetVal, zst.next_out -
+                            (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
+            goto error;
+        return RetVal;
     }
-
-    err=deflateEnd(&zst);
-    if (err == Z_OK)
-        ReturnVal = PyBytes_FromStringAndSize((char *)output,
-                                              zst.total_out);
     else
         zlib_error(zst, err, "while finishing compression");
-
  error:
-    PyMem_Free(output);
-
-    return ReturnVal;
+    Py_XDECREF(RetVal);
+    return NULL;
 }
 
 /*[python input]
 
-class capped_uint_converter(CConverter):
-    type = 'unsigned int'
-    converter = 'capped_uint_converter'
+class ssize_t_converter(CConverter):
+    type = 'Py_ssize_t'
+    converter = 'ssize_t_converter'
     c_ignored_default = "0"
 
 [python start generated code]*/
-/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=5f34ba1b394cb8e7]*/
 
 static int
-capped_uint_converter(PyObject *obj, void *ptr)
+ssize_t_converter(PyObject *obj, void *ptr)
 {
     PyObject *long_obj;
     Py_ssize_t val;
@@ -253,19 +309,7 @@
     if (val == -1 && PyErr_Occurred()) {
         return 0;
     }
-    if (val < 0) {
-        PyErr_SetString(PyExc_ValueError,
-                        "value must be positive");
-        return 0;
-    }
-
-    if ((size_t)val > UINT_MAX) {
-        *(unsigned int *)ptr = UINT_MAX;
-    }
-    else {
-        *(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t,
-            unsigned int);
-    }
+    *(Py_ssize_t *)ptr = val;
     return 1;
 }
 
@@ -276,7 +320,7 @@
         Compressed data.
     wbits: int(c_default="MAX_WBITS") = MAX_WBITS
         The window buffer size and container format.
-    bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
+    bufsize: ssize_t(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
         The initial output buffer size.
     /
 
@@ -285,44 +329,36 @@
 
 static PyObject *
 zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
-                     unsigned int bufsize)
-/*[clinic end generated code: output=475b36ead58b243d input=75123b0d4ff0541d]*/
+                     Py_ssize_t bufsize)
+/*[clinic end generated code: output=77c7e35111dc8c42 input=c13dd2c5696cd17f]*/
 {
-    PyObject *result_str = NULL;
-    Byte *input;
-    unsigned int length;
-    int err;
-    unsigned int new_bufsize;
+    PyObject *RetVal = NULL;
+    Byte *ibuf;
+    Py_ssize_t ibuflen;
+    int err, flush;
     z_stream zst;
 
-    if ((size_t)data->len > UINT_MAX) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "Size does not fit in an unsigned int");
-        goto error;
+    if (bufsize < 0) {
+        PyErr_SetString(PyExc_ValueError, "bufsize must be non-negative");
+        return NULL;
+    } else if (bufsize == 0) {
+        bufsize = 1;
     }
-    input = data->buf;
-    length = (unsigned int)data->len;
 
-    if (bufsize == 0)
-        bufsize = 1;
-
-    zst.avail_in = length;
-    zst.avail_out = bufsize;
-
-    if (!(result_str = PyBytes_FromStringAndSize(NULL, bufsize)))
-        goto error;
+    ibuf = data->buf;
+    ibuflen = data->len;
 
     zst.opaque = NULL;
     zst.zalloc = PyZlib_Malloc;
     zst.zfree = PyZlib_Free;
-    zst.next_out = (Byte *)PyBytes_AS_STRING(result_str);
-    zst.next_in = (Byte *)input;
+    zst.avail_in = 0;
+    zst.next_in = ibuf;
     err = inflateInit2(&zst, wbits);
 
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Out of memory while decompressing data");
         goto error;
@@ -333,46 +369,46 @@
     }
 
     do {
-        Py_BEGIN_ALLOW_THREADS
-        err=inflate(&zst, Z_FINISH);
-        Py_END_ALLOW_THREADS
+        arrange_input_buffer(&zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-        switch(err) {
-        case(Z_STREAM_END):
-            break;
-        case(Z_BUF_ERROR):
-            /*
-             * If there is at least 1 byte of room according to zst.avail_out
-             * and we get this error, assume that it means zlib cannot
-             * process the inflate call() due to an error in the data.
-             */
-            if (zst.avail_out > 0) {
-                zlib_error(zst, err, "while decompressing data");
+        do {
+            bufsize = arrange_output_buffer(&zst, &RetVal, bufsize);
+            if (bufsize < 0) {
                 inflateEnd(&zst);
                 goto error;
             }
-            /* fall through */
-        case(Z_OK):
-            /* need more memory */
-            if (bufsize <= (UINT_MAX >> 1))
-                new_bufsize = bufsize << 1;
-            else
-                new_bufsize = UINT_MAX;
-            if (_PyBytes_Resize(&result_str, new_bufsize) < 0) {
+
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&zst, flush);
+            Py_END_ALLOW_THREADS
+
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            case Z_MEM_ERROR:
                 inflateEnd(&zst);
+                PyErr_SetString(PyExc_MemoryError,
+                                "Out of memory while decompressing data");
+                goto error;
+            default:
+                inflateEnd(&zst);
+                zlib_error(zst, err, "while decompressing data");
                 goto error;
             }
-            zst.next_out =
-                (unsigned char *)PyBytes_AS_STRING(result_str) + bufsize;
-            zst.avail_out = bufsize;
-            bufsize = new_bufsize;
-            break;
-        default:
-            inflateEnd(&zst);
-            zlib_error(zst, err, "while decompressing data");
-            goto error;
-        }
-    } while (err != Z_STREAM_END);
+
+        } while (zst.avail_out == 0);
+
+    } while (err != Z_STREAM_END && ibuflen != 0);
+
+
+    if (err != Z_STREAM_END) {
+        inflateEnd(&zst);
+        zlib_error(zst, err, "while decompressing data");
+        goto error;
+    }
 
     err = inflateEnd(&zst);
     if (err != Z_OK) {
@@ -380,13 +416,14 @@
         goto error;
     }
 
-    if (_PyBytes_Resize(&result_str, zst.total_out) < 0)
+    if (_PyBytes_Resize(&RetVal, zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
         goto error;
 
-    return result_str;
+    return RetVal;
 
  error:
-    Py_XDECREF(result_str);
+    Py_XDECREF(RetVal);
     return NULL;
 }
 
@@ -433,7 +470,7 @@
     }
 
     self = newcompobject(&Comptype);
-    if (self==NULL)
+    if (self == NULL)
         goto error;
     self->zst.opaque = NULL;
     self->zst.zalloc = PyZlib_Malloc;
@@ -441,8 +478,8 @@
     self->zst.next_in = NULL;
     self->zst.avail_in = 0;
     err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy);
-    switch(err) {
-    case (Z_OK):
+    switch (err) {
+    case Z_OK:
         self->is_initialised = 1;
         if (zdict->buf == NULL) {
             goto success;
@@ -450,9 +487,9 @@
             err = deflateSetDictionary(&self->zst,
                                        zdict->buf, (unsigned int)zdict->len);
             switch (err) {
-            case (Z_OK):
+            case Z_OK:
                 goto success;
-            case (Z_STREAM_ERROR):
+            case Z_STREAM_ERROR:
                 PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
                 goto error;
             default:
@@ -460,11 +497,11 @@
                 goto error;
             }
        }
-    case (Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for compression object");
         goto error;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
         goto error;
     default:
@@ -475,7 +512,7 @@
  error:
     Py_CLEAR(self);
  success:
-    return (PyObject*)self;
+    return (PyObject *)self;
 }
 
 static int
@@ -493,7 +530,7 @@
         PyBuffer_Release(&zdict_buf);
         return -1;
     }
-    err = inflateSetDictionary(&(self->zst),
+    err = inflateSetDictionary(&self->zst,
                                zdict_buf.buf, (unsigned int)zdict_buf.len);
     PyBuffer_Release(&zdict_buf);
     if (err != Z_OK) {
@@ -530,7 +567,7 @@
 
     self = newcompobject(&Decomptype);
     if (self == NULL)
-        return(NULL);
+        return NULL;
     self->zst.opaque = NULL;
     self->zst.zalloc = PyZlib_Malloc;
     self->zst.zfree = PyZlib_Free;
@@ -541,8 +578,8 @@
         self->zdict = zdict;
     }
     err = inflateInit2(&self->zst, wbits);
-    switch(err) {
-    case (Z_OK):
+    switch (err) {
+    case Z_OK:
         self->is_initialised = 1;
         if (self->zdict != NULL && wbits < 0) {
 #ifdef AT_LEAST_ZLIB_1_2_2_1
@@ -558,12 +595,12 @@
             return NULL;
 #endif
         }
-        return (PyObject*)self;
-    case(Z_STREAM_ERROR):
+        return (PyObject *)self;
+    case Z_STREAM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
         return NULL;
-    case (Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         Py_DECREF(self);
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for decompression object");
@@ -621,113 +658,90 @@
 zlib_Compress_compress_impl(compobject *self, Py_buffer *data)
 /*[clinic end generated code: output=5d5cd791cbc6a7f4 input=0d95908d6e64fab8]*/
 {
+    PyObject *RetVal = NULL;
+    Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE;
     int err;
-    unsigned int inplen;
-    unsigned int length = DEF_BUF_SIZE, new_length;
-    PyObject *RetVal;
-    Byte *input;
-    unsigned long start_total_out;
 
-    if ((size_t)data->len > UINT_MAX) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "Size does not fit in an unsigned int");
-        return NULL;
-    }
-    input = data->buf;
-    inplen = (unsigned int)data->len;
-
-    if (!(RetVal = PyBytes_FromStringAndSize(NULL, length)))
-        return NULL;
+    self->zst.next_in = data->buf;
+    ibuflen = data->len;
 
     ENTER_ZLIB(self);
 
-    start_total_out = self->zst.total_out;
-    self->zst.avail_in = inplen;
-    self->zst.next_in = input;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal);
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = deflate(&(self->zst), Z_NO_FLUSH);
-    Py_END_ALLOW_THREADS
+        do {
+            obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen);
+            if (obuflen < 0)
+                goto error;
 
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        if (length <= (UINT_MAX >> 1))
-            new_length = length << 1;
-        else
-            new_length = UINT_MAX;
-        if (_PyBytes_Resize(&RetVal, new_length) < 0) {
-            Py_CLEAR(RetVal);
-            goto done;
-        }
-        self->zst.next_out =
-            (unsigned char *)PyBytes_AS_STRING(RetVal) + length;
-        self->zst.avail_out = length;
-        length = new_length;
+            Py_BEGIN_ALLOW_THREADS
+            err = deflate(&self->zst, Z_NO_FLUSH);
+            Py_END_ALLOW_THREADS
 
-        Py_BEGIN_ALLOW_THREADS
-        err = deflate(&(self->zst), Z_NO_FLUSH);
-        Py_END_ALLOW_THREADS
-    }
-    /* We will only get Z_BUF_ERROR if the output buffer was full but
-       there wasn't more output when we tried again, so it is not an error
-       condition.
-    */
+            if (err == Z_STREAM_ERROR) {
+                zlib_error(self->zst, err, "while compressing data");
+                goto error;
+            }
 
-    if (err != Z_OK && err != Z_BUF_ERROR) {
-        zlib_error(self->zst, err, "while compressing data");
-        Py_CLEAR(RetVal);
-        goto done;
-    }
-    if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) {
-        Py_CLEAR(RetVal);
-    }
+        } while (self->zst.avail_out == 0);
+        assert(self->zst.avail_in == 0);
 
- done:
+    } while (ibuflen != 0);
+
+    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0)
+        goto success;
+
+ error:
+    Py_CLEAR(RetVal);
+ success:
     LEAVE_ZLIB(self);
     return RetVal;
 }
 
-/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
+/* Helper for objdecompress() and flush(). Saves any unconsumed input data in
    self->unused_data or self->unconsumed_tail, as appropriate. */
 static int
-save_unconsumed_input(compobject *self, int err)
+save_unconsumed_input(compobject *self, Py_buffer *data, int err)
 {
     if (err == Z_STREAM_END) {
         /* The end of the compressed data has been reached. Store the leftover
            input data in self->unused_data. */
         if (self->zst.avail_in > 0) {
             Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
-            Py_ssize_t new_size;
+            Py_ssize_t new_size, left_size;
             PyObject *new_data;
-            if ((size_t)self->zst.avail_in > (size_t)UINT_MAX - (size_t)old_size) {
+            left_size = (Byte *)data->buf + data->len - self->zst.next_in;
+            if (left_size > (PY_SSIZE_T_MAX - old_size)) {
                 PyErr_NoMemory();
                 return -1;
             }
-            new_size = old_size + self->zst.avail_in;
+            new_size = old_size + left_size;
             new_data = PyBytes_FromStringAndSize(NULL, new_size);
             if (new_data == NULL)
                 return -1;
             Py_MEMCPY(PyBytes_AS_STRING(new_data),
                       PyBytes_AS_STRING(self->unused_data), old_size);
             Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
-                      self->zst.next_in, self->zst.avail_in);
+                      self->zst.next_in, left_size);
             Py_SETREF(self->unused_data, new_data);
             self->zst.avail_in = 0;
         }
     }
+
     if (self->zst.avail_in > 0 || PyBytes_GET_SIZE(self->unconsumed_tail)) {
         /* This code handles two distinct cases:
            1. Output limit was reached. Save leftover input in unconsumed_tail.
            2. All input data was consumed. Clear unconsumed_tail. */
+        Py_ssize_t left_size = (Byte *)data->buf + data->len - self->zst.next_in;
         PyObject *new_data = PyBytes_FromStringAndSize(
-                (char *)self->zst.next_in, self->zst.avail_in);
+                (char *)self->zst.next_in, left_size);
         if (new_data == NULL)
             return -1;
         Py_SETREF(self->unconsumed_tail, new_data);
     }
+
     return 0;
 }
 
@@ -736,7 +750,7 @@
 
     data: Py_buffer
         The binary data to decompress.
-    max_length: capped_uint = 0
+    max_length: ssize_t = 0
         The maximum allowable length of the decompressed data.
         Unconsumed input data will be stored in
         the unconsumed_tail attribute.
@@ -751,85 +765,72 @@
 
 static PyObject *
 zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
-                                unsigned int max_length)
-/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/
+                                Py_ssize_t max_length)
+/*[clinic end generated code: output=6e5173c74e710352 input=d6de9b53c4566b8a]*/
 {
-    int err;
-    unsigned int old_length, length = DEF_BUF_SIZE;
+    int err = Z_OK;
+    Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE, hard_limit;
     PyObject *RetVal = NULL;
-    unsigned long start_total_out;
 
-    if ((size_t)data->len > UINT_MAX) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "Size does not fit in an unsigned int");
+    if (max_length < 0) {
+        PyErr_SetString(PyExc_ValueError, "max_length must be non-negative");
         return NULL;
-    }
+    } else if (max_length == 0)
+        hard_limit = PY_SSIZE_T_MAX;
+    else
+        hard_limit = max_length;
+
+    self->zst.next_in = data->buf;
+    ibuflen = data->len;
 
     /* limit amount of data allocated to max_length */
-    if (max_length && length > max_length)
-        length = max_length;
-    if (!(RetVal = PyBytes_FromStringAndSize(NULL, length)))
-        return NULL;
+    if (max_length && obuflen > max_length)
+        obuflen = max_length;
 
     ENTER_ZLIB(self);
 
-    start_total_out = self->zst.total_out;
-    self->zst.avail_in = (unsigned int)data->len;
-    self->zst.next_in = data->buf;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal);
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = inflate(&(self->zst), Z_SYNC_FLUSH);
-    Py_END_ALLOW_THREADS
+        do {
+            obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal,
+                                                         obuflen, hard_limit);
+            if (obuflen == -2) {
+                if (max_length > 0) {
+                    goto save;
+                }
+                PyErr_NoMemory();
+            }
+            if (obuflen < 0) {
+                goto abort;
+            }
 
-    if (err == Z_NEED_DICT && self->zdict != NULL) {
-        if (set_inflate_zdict(self) < 0) {
-            Py_DECREF(RetVal);
-            RetVal = NULL;
-            goto error;
-        }
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&self->zst, Z_SYNC_FLUSH);
+            Py_END_ALLOW_THREADS
 
-        /* Repeat the call to inflate. */
-        Py_BEGIN_ALLOW_THREADS
-        err = inflate(&(self->zst), Z_SYNC_FLUSH);
-        Py_END_ALLOW_THREADS
-    }
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            default:
+                if (err == Z_NEED_DICT && self->zdict != NULL) {
+                    if (set_inflate_zdict(self) < 0)
+                        goto abort;
+                    else
+                        break;
+                }
+                goto save;
+            }
 
-    /* While Z_OK and the output buffer is full, there might be more output.
-       So extend the output buffer and try again.
-    */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        /* If max_length set, don't continue decompressing if we've already
-           reached the limit.
-        */
-        if (max_length && length >= max_length)
-            break;
+        } while (self->zst.avail_out == 0 || err == Z_NEED_DICT);
 
-        /* otherwise, ... */
-        old_length = length;
-        length = length << 1;
-        if (max_length && length > max_length)
-            length = max_length;
+    } while (err != Z_STREAM_END && ibuflen != 0);
 
-        if (_PyBytes_Resize(&RetVal, length) < 0) {
-            Py_CLEAR(RetVal);
-            goto error;
-        }
-        self->zst.next_out =
-            (unsigned char *)PyBytes_AS_STRING(RetVal) + old_length;
-        self->zst.avail_out = length - old_length;
-
-        Py_BEGIN_ALLOW_THREADS
-        err = inflate(&(self->zst), Z_SYNC_FLUSH);
-        Py_END_ALLOW_THREADS
-    }
-
-    if (save_unconsumed_input(self, err) < 0) {
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
-    }
+ save:
+    if (save_unconsumed_input(self, data, err) < 0)
+        goto abort;
 
     if (err == Z_STREAM_END) {
         /* This is the logical place to call inflateEnd, but the old behaviour
@@ -841,16 +842,16 @@
            not an error condition.
         */
         zlib_error(self->zst, err, "while decompressing data");
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
+        goto abort;
     }
 
-    if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) {
-        Py_CLEAR(RetVal);
-    }
+    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0)
+        goto success;
 
- error:
+ abort:
+    Py_CLEAR(RetVal);
+ success:
     LEAVE_ZLIB(self);
     return RetVal;
 }
@@ -873,9 +874,8 @@
 /*[clinic end generated code: output=a203f4cefc9de727 input=73ed066794bd15bc]*/
 {
     int err;
-    unsigned int length = DEF_BUF_SIZE, new_length;
-    PyObject *RetVal;
-    unsigned long start_total_out;
+    Py_ssize_t length = DEF_BUF_SIZE;
+    PyObject *RetVal = NULL;
 
     /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in
        doing any work at all; just return an empty string. */
@@ -883,50 +883,37 @@
         return PyBytes_FromStringAndSize(NULL, 0);
     }
 
-    if (!(RetVal = PyBytes_FromStringAndSize(NULL, length)))
-        return NULL;
-
     ENTER_ZLIB(self);
 
-    start_total_out = self->zst.total_out;
     self->zst.avail_in = 0;
-    self->zst.avail_out = length;
-    self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal);
 
-    Py_BEGIN_ALLOW_THREADS
-    err = deflate(&(self->zst), mode);
-    Py_END_ALLOW_THREADS
-
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while (err == Z_OK && self->zst.avail_out == 0) {
-        if (length <= (UINT_MAX >> 1))
-            new_length = length << 1;
-        else
-            new_length = UINT_MAX;
-        if (_PyBytes_Resize(&RetVal, new_length) < 0) {
+    do {
+        length = arrange_output_buffer(&self->zst, &RetVal, length);
+        if (length < 0) {
             Py_CLEAR(RetVal);
             goto error;
         }
-        self->zst.next_out =
-            (unsigned char *)PyBytes_AS_STRING(RetVal) + length;
-        self->zst.avail_out = length;
-        length = new_length;
 
         Py_BEGIN_ALLOW_THREADS
-        err = deflate(&(self->zst), mode);
+        err = deflate(&self->zst, mode);
         Py_END_ALLOW_THREADS
-    }
+
+        if (err == Z_STREAM_ERROR) {
+            zlib_error(self->zst, err, "while flushing");
+            Py_CLEAR(RetVal);
+            goto error;
+        }
+    } while (self->zst.avail_out == 0);
+    assert(self->zst.avail_in == 0);
 
     /* If mode is Z_FINISH, we also have to call deflateEnd() to free
        various data structures. Note we should only get Z_STREAM_END when
        mode is Z_FINISH, but checking both for safety*/
     if (err == Z_STREAM_END && mode == Z_FINISH) {
-        err = deflateEnd(&(self->zst));
+        err = deflateEnd(&self->zst);
         if (err != Z_OK) {
             zlib_error(self->zst, err, "while finishing compression");
-            Py_DECREF(RetVal);
-            RetVal = NULL;
+            Py_CLEAR(RetVal);
             goto error;
         }
         else
@@ -936,20 +923,18 @@
            but there wasn't more output when we tried again, so it is
            not an error condition.
         */
-    } else if (err!=Z_OK && err!=Z_BUF_ERROR) {
+    } else if (err != Z_OK && err != Z_BUF_ERROR) {
         zlib_error(self->zst, err, "while flushing");
-        Py_DECREF(RetVal);
-        RetVal = NULL;
+        Py_CLEAR(RetVal);
         goto error;
     }
 
-    if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) {
+    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
         Py_CLEAR(RetVal);
-    }
 
  error:
     LEAVE_ZLIB(self);
-
     return RetVal;
 }
 
@@ -976,13 +961,13 @@
      */
     ENTER_ZLIB(self);
     err = deflateCopy(&retval->zst, &self->zst);
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
         goto error;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for compression object");
         goto error;
@@ -1031,13 +1016,13 @@
      */
     ENTER_ZLIB(self);
     err = inflateCopy(&retval->zst, &self->zst);
-    switch(err) {
-    case(Z_OK):
+    switch (err) {
+    case Z_OK:
         break;
-    case(Z_STREAM_ERROR):
+    case Z_STREAM_ERROR:
         PyErr_SetString(PyExc_ValueError, "Inconsistent stream state");
         goto error;
-    case(Z_MEM_ERROR):
+    case Z_MEM_ERROR:
         PyErr_SetString(PyExc_MemoryError,
                         "Can't allocate memory for decompression object");
         goto error;
@@ -1070,7 +1055,7 @@
 /*[clinic input]
 zlib.Decompress.flush
 
-    length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
+    length: ssize_t(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
         the initial size of the output buffer.
     /
 
@@ -1078,88 +1063,84 @@
 [clinic start generated code]*/
 
 static PyObject *
-zlib_Decompress_flush_impl(compobject *self, unsigned int length)
-/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/
+zlib_Decompress_flush_impl(compobject *self, Py_ssize_t length)
+/*[clinic end generated code: output=68c75ea127cbe654 input=aa4ec37f3aef4da0]*/
 {
-    int err;
-    unsigned int new_length;
-    PyObject * retval = NULL;
-    unsigned long start_total_out;
-    Py_ssize_t size;
+    int err, flush;
+    Py_buffer data;
+    PyObject *RetVal = NULL;
+    Py_ssize_t ibuflen;
 
-    if (length == 0) {
+    if (length <= 0) {
         PyErr_SetString(PyExc_ValueError, "length must be greater than zero");
         return NULL;
     }
 
-    if (!(retval = PyBytes_FromStringAndSize(NULL, length)))
+    if (PyObject_GetBuffer(self->unconsumed_tail, &data, PyBUF_SIMPLE) == -1)
         return NULL;
 
-
     ENTER_ZLIB(self);
 
-    size = PyBytes_GET_SIZE(self->unconsumed_tail);
+    self->zst.next_in = data.buf;
+    ibuflen = data.len;
 
-    start_total_out = self->zst.total_out;
-    /* save_unconsumed_input() ensures that unconsumed_tail length is lesser
-       or equal than UINT_MAX */
-    self->zst.avail_in = Py_SAFE_DOWNCAST(size, Py_ssize_t, unsigned int);
-    self->zst.next_in = (Byte *)PyBytes_AS_STRING(self->unconsumed_tail);
-    self->zst.avail_out = length;
-    self->zst.next_out = (Byte *)PyBytes_AS_STRING(retval);
+    do {
+        arrange_input_buffer(&self->zst, &ibuflen);
+        flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
-    Py_BEGIN_ALLOW_THREADS
-    err = inflate(&(self->zst), Z_FINISH);
-    Py_END_ALLOW_THREADS
+        do {
+            length = arrange_output_buffer(&self->zst, &RetVal, length);
+            if (length < 0)
+                goto abort;
 
-    /* while Z_OK and the output buffer is full, there might be more output,
-       so extend the output buffer and try again */
-    while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) {
-        if (length <= (UINT_MAX >> 1))
-            new_length = length << 1;
-        else
-            new_length = UINT_MAX;
-        if (_PyBytes_Resize(&retval, new_length) < 0) {
-            Py_CLEAR(retval);
-            goto error;
-        }
-        self->zst.next_out = (Byte *)PyBytes_AS_STRING(retval) + length;
-        self->zst.avail_out = length;
-        length = new_length;
+            Py_BEGIN_ALLOW_THREADS
+            err = inflate(&self->zst, flush);
+            Py_END_ALLOW_THREADS
 
-        Py_BEGIN_ALLOW_THREADS
-        err = inflate(&(self->zst), Z_FINISH);
-        Py_END_ALLOW_THREADS
-    }
+            switch (err) {
+            case Z_OK:            /* fall through */
+            case Z_BUF_ERROR:     /* fall through */
+            case Z_STREAM_END:
+                break;
+            default:
+                if (err == Z_NEED_DICT && self->zdict != NULL) {
+                    if (set_inflate_zdict(self) < 0)
+                        goto abort;
+                    else
+                        break;
+                }
+                goto save;
+            }
 
-    if (save_unconsumed_input(self, err) < 0) {
-        Py_DECREF(retval);
-        retval = NULL;
-        goto error;
-    }
+        } while (self->zst.avail_out == 0 || err == Z_NEED_DICT);
+
+    } while (err != Z_STREAM_END && ibuflen != 0);
+
+ save:
+    if (save_unconsumed_input(self, &data, err) < 0)
+        goto abort;
 
     /* If at end of stream, clean up any memory allocated by zlib. */
     if (err == Z_STREAM_END) {
         self->eof = 1;
         self->is_initialised = 0;
-        err = inflateEnd(&(self->zst));
+        err = inflateEnd(&self->zst);
         if (err != Z_OK) {
             zlib_error(self->zst, err, "while finishing decompression");
-            Py_DECREF(retval);
-            retval = NULL;
-            goto error;
+            goto abort;
         }
     }
 
-    if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) {
-        Py_CLEAR(retval);
-    }
+    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
+                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0)
+        goto success;
 
-error:
-
+ abort:
+    Py_CLEAR(RetVal);
+ success:
+    PyBuffer_Release(&data);
     LEAVE_ZLIB(self);
-
-    return retval;
+    return RetVal;
 }
 
 #include "clinic/zlibmodule.c.h"

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list