[Python-checkins] closes bpo-34656: Avoid relying on signed overflow in _pickle memos. (GH-9261) (#11869)

larryhastings webhook-mailer at python.org
Mon Feb 25 19:41:36 EST 2019


https://github.com/python/cpython/commit/ef33dd6036aafbd3f06c1d56e2b1a81dae3da63c
commit: ef33dd6036aafbd3f06c1d56e2b1a81dae3da63c
branch: 3.5
author: Victor Stinner <vstinner at redhat.com>
committer: larryhastings <larry at hastings.org>
date: 2019-02-25T16:42:39-08:00
summary:

closes bpo-34656: Avoid relying on signed overflow in _pickle memos. (GH-9261) (#11869)

(cherry picked from commit a4ae828ee416a66d8c7bf5ee71d653c2cc6a26dd)

files:
M Modules/_pickle.c

diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 0f62b1c019c9..fcb9e8789951 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -527,9 +527,9 @@ typedef struct {
 } PyMemoEntry;
 
 typedef struct {
-    Py_ssize_t mt_mask;
-    Py_ssize_t mt_used;
-    Py_ssize_t mt_allocated;
+    size_t mt_mask;
+    size_t mt_used;
+    size_t mt_allocated;
     PyMemoEntry *mt_table;
 } PyMemoTable;
 
@@ -573,8 +573,8 @@ typedef struct UnpicklerObject {
     /* The unpickler memo is just an array of PyObject *s. Using a dict
        is unnecessary, since the keys are contiguous ints. */
     PyObject **memo;
-    Py_ssize_t memo_size;       /* Capacity of the memo array */
-    Py_ssize_t memo_len;        /* Number of objects in the memo */
+    size_t memo_size;       /* Capacity of the memo array */
+    size_t memo_len;        /* Number of objects in the memo */
 
     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
 
@@ -658,7 +658,6 @@ PyMemoTable_New(void)
 static PyMemoTable *
 PyMemoTable_Copy(PyMemoTable *self)
 {
-    Py_ssize_t i;
     PyMemoTable *new = PyMemoTable_New();
     if (new == NULL)
         return NULL;
@@ -675,7 +674,7 @@ PyMemoTable_Copy(PyMemoTable *self)
         PyErr_NoMemory();
         return NULL;
     }
-    for (i = 0; i < self->mt_allocated; i++) {
+    for (size_t i = 0; i < self->mt_allocated; i++) {
         Py_XINCREF(self->mt_table[i].me_key);
     }
     memcpy(new->mt_table, self->mt_table,
@@ -721,7 +720,7 @@ _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
 {
     size_t i;
     size_t perturb;
-    size_t mask = (size_t)self->mt_mask;
+    size_t mask = self->mt_mask;
     PyMemoEntry *table = self->mt_table;
     PyMemoEntry *entry;
     Py_hash_t hash = (Py_hash_t)key >> 3;
@@ -743,22 +742,24 @@ _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
 
 /* Returns -1 on failure, 0 on success. */
 static int
-_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
+_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
 {
     PyMemoEntry *oldtable = NULL;
     PyMemoEntry *oldentry, *newentry;
-    Py_ssize_t new_size = MT_MINSIZE;
-    Py_ssize_t to_process;
+    size_t new_size = MT_MINSIZE;
+    size_t to_process;
 
     assert(min_size > 0);
 
-    /* Find the smallest valid table size >= min_size. */
-    while (new_size < min_size && new_size > 0)
-        new_size <<= 1;
-    if (new_size <= 0) {
+    if (min_size > PY_SSIZE_T_MAX) {
         PyErr_NoMemory();
         return -1;
     }
+
+    /* Find the smallest valid table size >= min_size. */
+    while (new_size < min_size) {
+        new_size <<= 1;
+    }
     /* new_size needs to be a power of two. */
     assert((new_size & (new_size - 1)) == 0);
 
@@ -808,6 +809,7 @@ static int
 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
 {
     PyMemoEntry *entry;
+    size_t desired_size;
 
     assert(key != NULL);
 
@@ -831,10 +833,12 @@ PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
      * Very large memo tables (over 50K items) use doubling instead.
      * This may help applications with severe memory constraints.
      */
-    if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
+    if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
         return 0;
-    return _PyMemoTable_ResizeTable(self,
-        (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
+    }
+    // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
+    desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
+    return _PyMemoTable_ResizeTable(self, desired_size);
 }
 
 #undef MT_MINSIZE
@@ -1273,9 +1277,9 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
    will be modified in place. */
 static int
-_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
+_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
 {
-    Py_ssize_t i;
+    size_t i;
 
     assert(new_size > self->memo_size);
 
@@ -1292,9 +1296,9 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
 
 /* Returns NULL if idx is out of bounds. */
 static PyObject *
-_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
+_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
 {
-    if (idx < 0 || idx >= self->memo_size)
+    if (idx >= self->memo_size)
         return NULL;
 
     return self->memo[idx];
@@ -1303,7 +1307,7 @@ _Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
 /* Returns -1 (with an exception set) on failure, 0 on success.
    This takes its own reference to `value`. */
 static int
-_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
+_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
 {
     PyObject *old_item;
 
@@ -4194,14 +4198,13 @@ static PyObject *
 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
 {
-    Py_ssize_t i;
     PyMemoTable *memo;
     PyObject *new_memo = PyDict_New();
     if (new_memo == NULL)
         return NULL;
 
     memo = self->pickler->memo;
-    for (i = 0; i < memo->mt_allocated; ++i) {
+    for (size_t i = 0; i < memo->mt_allocated; ++i) {
         PyMemoEntry entry = memo->mt_table[i];
         if (entry.me_key != NULL) {
             int status;
@@ -6620,7 +6623,7 @@ static PyObject *
 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
 {
-    Py_ssize_t i;
+    size_t i;
     PyObject *new_memo = PyDict_New();
     if (new_memo == NULL)
         return NULL;
@@ -6771,8 +6774,7 @@ static int
 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
 {
     PyObject **new_memo;
-    Py_ssize_t new_memo_size = 0;
-    Py_ssize_t i;
+    size_t new_memo_size = 0;
 
     if (obj == NULL) {
         PyErr_SetString(PyExc_TypeError,
@@ -6789,7 +6791,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
         if (new_memo == NULL)
             return -1;
 
-        for (i = 0; i < new_memo_size; i++) {
+        for (size_t i = 0; i < new_memo_size; i++) {
             Py_XINCREF(unpickler->memo[i]);
             new_memo[i] = unpickler->memo[i];
         }
@@ -6837,8 +6839,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
 
   error:
     if (new_memo_size) {
-        i = new_memo_size;
-        while (--i >= 0) {
+        for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
             Py_XDECREF(new_memo[i]);
         }
         PyMem_FREE(new_memo);



More information about the Python-checkins mailing list