[Python-checkins] [3.6] bpo-30416: Protect the optimizer during constant folding. (#4865)

Serhiy Storchaka webhook-mailer at python.org
Fri Dec 15 07:12:17 EST 2017


https://github.com/python/cpython/commit/b580f4f2bf49fd3b11f2a046911993560c02492a
commit: b580f4f2bf49fd3b11f2a046911993560c02492a
branch: 3.6
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-12-15T14:12:14+02:00
summary:

[3.6] bpo-30416: Protect the optimizer during constant folding. (#4865)

It no longer spends much time doing complex calculations and no
longer consumes much memory for creating large constants that will
be dropped later.

This fixes also bpo-21074.

files:
A Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst
M Lib/test/test_memoryio.py
M Lib/test/test_peepholer.py
M Python/peephole.c

diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py
index 55b693e5647..80f3b85b6eb 100644
--- a/Lib/test/test_memoryio.py
+++ b/Lib/test/test_memoryio.py
@@ -733,7 +733,8 @@ def test_sizeof(self):
         check = self.check_sizeof
         self.assertEqual(object.__sizeof__(io.BytesIO()), basesize)
         check(io.BytesIO(), basesize )
-        check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000))
+        n = 1000  # use a variable to prevent constant folding
+        check(io.BytesIO(b'a' * n), basesize + sys.getsizeof(b'a' * n))
 
     # Various tests of copy-on-write behaviour for BytesIO.
 
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
index b0336407c3b..e028b096c9f 100644
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -178,8 +178,15 @@ def test_folding_of_binops_on_constants(self):
         self.assertInBytecode(code, 'LOAD_CONST', 'b')
 
         # Verify that large sequences do not result from folding
-        code = compile('a="x"*1000', '', 'single')
+        code = compile('a="x"*10000', '', 'single')
+        self.assertInBytecode(code, 'LOAD_CONST', 10000)
+        self.assertNotIn("x"*10000, code.co_consts)
+        code = compile('a=1<<1000', '', 'single')
         self.assertInBytecode(code, 'LOAD_CONST', 1000)
+        self.assertNotIn(1<<1000, code.co_consts)
+        code = compile('a=2**1000', '', 'single')
+        self.assertInBytecode(code, 'LOAD_CONST', 1000)
+        self.assertNotIn(2**1000, code.co_consts)
 
     def test_binary_subscr_on_unicode(self):
         # valid code get optimized
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst b/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst
new file mode 100644
index 00000000000..8db577bfdb3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst	
@@ -0,0 +1,3 @@
+The optimizer is now protected from spending much time doing complex
+calculations and consuming much memory for creating large constants in
+constant folding.
diff --git a/Python/peephole.c b/Python/peephole.c
index dd8f3e4807f..31d4e92cfd3 100644
--- a/Python/peephole.c
+++ b/Python/peephole.c
@@ -167,6 +167,37 @@ copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,
     return maxi - 1;
 }
 
+/* Check whether a collection doesn't containing too much items (including
+   subcollections).  This protects from creating a constant that needs
+   too much time for calculating a hash.
+   "limit" is the maximal number of items.
+   Returns the negative number if the total number of items exceeds the
+   limit.  Otherwise returns the limit minus the total number of items.
+*/
+
+static Py_ssize_t
+check_complexity(PyObject *obj, Py_ssize_t limit)
+{
+    if (PyTuple_Check(obj)) {
+        Py_ssize_t i;
+        limit -= PyTuple_GET_SIZE(obj);
+        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
+            limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit);
+        }
+        return limit;
+    }
+    else if (PyFrozenSet_Check(obj)) {
+        Py_ssize_t i = 0;
+        PyObject *item;
+        Py_hash_t hash;
+        limit -= PySet_GET_SIZE(obj);
+        while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) {
+            limit = check_complexity(item, limit);
+        }
+    }
+    return limit;
+}
+
 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
    with    LOAD_CONST (c1, c2, ... cn).
    The consts table must still be in list form so that the
@@ -218,6 +249,101 @@ fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
     return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
 }
 
+#define MAX_INT_SIZE           128  /* bits */
+#define MAX_COLLECTION_SIZE     20  /* items */
+#define MAX_STR_SIZE            20  /* characters */
+#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
+
+static PyObject *
+safe_multiply(PyObject *v, PyObject *w)
+{
+    if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
+        size_t vbits = _PyLong_NumBits(v);
+        size_t wbits = _PyLong_NumBits(w);
+        if (vbits == (size_t)-1 || wbits == (size_t)-1) {
+            return NULL;
+        }
+        if (vbits + wbits > MAX_INT_SIZE) {
+            return NULL;
+        }
+    }
+    else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) {
+        Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) :
+                                             PySet_GET_SIZE(w);
+        if (size) {
+            long n = PyLong_AsLong(v);
+            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
+                return NULL;
+            }
+            if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
+                return NULL;
+            }
+        }
+    }
+    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
+        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
+                                               PyBytes_GET_SIZE(w);
+        if (size) {
+            long n = PyLong_AsLong(v);
+            if (n < 0 || n > MAX_STR_SIZE / size) {
+                return NULL;
+            }
+        }
+    }
+    else if (PyLong_Check(w) &&
+             (PyTuple_Check(v) || PyFrozenSet_Check(v) ||
+              PyUnicode_Check(v) || PyBytes_Check(v)))
+    {
+        return safe_multiply(w, v);
+    }
+
+    return PyNumber_Multiply(v, w);
+}
+
+static PyObject *
+safe_power(PyObject *v, PyObject *w)
+{
+    if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w) > 0) {
+        size_t vbits = _PyLong_NumBits(v);
+        size_t wbits = PyLong_AsSize_t(w);
+        if (vbits == (size_t)-1 || wbits == (size_t)-1) {
+            return NULL;
+        }
+        if (vbits > MAX_INT_SIZE / wbits) {
+            return NULL;
+        }
+    }
+
+    return PyNumber_Power(v, w, Py_None);
+}
+
+static PyObject *
+safe_lshift(PyObject *v, PyObject *w)
+{
+    if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
+        size_t vbits = _PyLong_NumBits(v);
+        size_t wbits = PyLong_AsSize_t(w);
+        if (vbits == (size_t)-1 || wbits == (size_t)-1) {
+            return NULL;
+        }
+        if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) {
+            return NULL;
+        }
+    }
+
+    return PyNumber_Lshift(v, w);
+}
+
+static PyObject *
+safe_mod(PyObject *v, PyObject *w)
+{
+    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
+        return NULL;
+    }
+
+    return PyNumber_Remainder(v, w);
+}
+
 /* Replace LOAD_CONST c1, LOAD_CONST c2, BINOP
    with    LOAD_CONST binop(c1,c2)
    The consts table must still be in list form so that the
@@ -234,7 +360,7 @@ fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
                          PyObject *consts, PyObject **objs)
 {
     PyObject *newconst, *v, *w;
-    Py_ssize_t len_consts, size;
+    Py_ssize_t len_consts;
 
     /* Pre-conditions */
     assert(PyList_CheckExact(consts));
@@ -245,10 +371,10 @@ fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
     w = objs[1];
     switch (opcode) {
         case BINARY_POWER:
-            newconst = PyNumber_Power(v, w, Py_None);
+            newconst = safe_power(v, w);
             break;
         case BINARY_MULTIPLY:
-            newconst = PyNumber_Multiply(v, w);
+            newconst = safe_multiply(v, w);
             break;
         case BINARY_TRUE_DIVIDE:
             newconst = PyNumber_TrueDivide(v, w);
@@ -257,7 +383,7 @@ fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
             newconst = PyNumber_FloorDivide(v, w);
             break;
         case BINARY_MODULO:
-            newconst = PyNumber_Remainder(v, w);
+            newconst = safe_mod(v, w);
             break;
         case BINARY_ADD:
             newconst = PyNumber_Add(v, w);
@@ -269,7 +395,7 @@ fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
             newconst = PyObject_GetItem(v, w);
             break;
         case BINARY_LSHIFT:
-            newconst = PyNumber_Lshift(v, w);
+            newconst = safe_lshift(v, w);
             break;
         case BINARY_RSHIFT:
             newconst = PyNumber_Rshift(v, w);
@@ -296,16 +422,6 @@ fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
         }
         return -1;
     }
-    size = PyObject_Size(newconst);
-    if (size == -1) {
-        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
-            return -1;
-        }
-        PyErr_Clear();
-    } else if (size > 20) {
-        Py_DECREF(newconst);
-        return -1;
-    }
 
     /* Append folded constant into consts table */
     if (PyList_Append(consts, newconst)) {



More information about the Python-checkins mailing list