[Python-checkins] bpo-35193: Fix an off by one error in the RETURN_VALUE case. (GH-10418)

Miss Islington (bot) webhook-mailer at python.org
Thu Nov 8 21:13:21 EST 2018


https://github.com/python/cpython/commit/f16ebcd460aaeb8d6b31db317d22f5ed68afbcc8
commit: f16ebcd460aaeb8d6b31db317d22f5ed68afbcc8
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2018-11-08T18:13:14-08:00
summary:

bpo-35193: Fix an off by one error in the RETURN_VALUE case. (GH-10418)


Fix an off by one error in the peephole optimizer when checking for unreachable code beyond a return.

Do a bounds check within find_op so it can return before going past the end as a safety measure.

https://github.com/python/cpython/commit/7db3c488335168993689ddae5914a28e16188447GH-diff-a33329ae6ae0bb295d742f0caf93c137
introduced this off by one error while fixing another one nearby.

This bug was shipped in all Python 3.6 and 3.7 releases.

The included unittest won't fail unless you do a clang msan build.
(cherry picked from commit 49fa4a9f1ef387e16596f271414c855339eadf09)

Co-authored-by: Gregory P. Smith <greg at krypto.org>

files:
A Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst
M Lib/test/test_compile.py
M Python/peephole.c

diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index d8a57e5e0925..5ac1c5f27131 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1,3 +1,4 @@
+import dis
 import math
 import os
 import unittest
@@ -621,6 +622,24 @@ def check_same_constant(const):
         self.check_constant(f1, frozenset({0}))
         self.assertTrue(f1(0))
 
+    # This is a regression test for a CPython specific peephole optimizer
+    # implementation bug present in a few releases.  It's assertion verifies
+    # that peephole optimization was actually done though that isn't an
+    # indication of the bugs presence or not (crashing is).
+    @support.cpython_only
+    def test_peephole_opt_unreachable_code_array_access_in_bounds(self):
+        """Regression test for issue35193 when run under clang msan."""
+        def unused_code_at_end():
+            return 3
+            raise RuntimeError("unreachable")
+        # The above function definition will trigger the out of bounds
+        # bug in the peephole optimizer as it scans opcodes past the
+        # RETURN_VALUE opcode.  This does not always crash an interpreter.
+        # When you build with the clang memory sanitizer it reliably aborts.
+        self.assertEqual(
+            'RETURN_VALUE',
+            list(dis.get_instructions(unused_code_at_end))[-1].opname)
+
     def test_dont_merge_constants(self):
         # Issue #25843: compile() must not merge constants which are equal
         # but have a different type.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst
new file mode 100644
index 000000000000..dddebe167080
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst	
@@ -0,0 +1,3 @@
+Fix an off by one error in the bytecode peephole optimizer where it could read
+bytes beyond the end of bounds of an array when removing unreachable code.
+This bug was present in every release of Python 3.6 and 3.7 until now.
diff --git a/Python/peephole.c b/Python/peephole.c
index 76a0edbcc2bc..a3b078fdf1d4 100644
--- a/Python/peephole.c
+++ b/Python/peephole.c
@@ -50,9 +50,9 @@ lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n)
 
 /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */
 static Py_ssize_t
-find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i)
+find_op(const _Py_CODEUNIT *codestr, Py_ssize_t codelen, Py_ssize_t i)
 {
-    while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
+    while (i < codelen && _Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
         i++;
     }
     return i;
@@ -128,8 +128,9 @@ copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,
    Called with codestr pointing to the first LOAD_CONST.
 */
 static Py_ssize_t
-fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
-                        Py_ssize_t opcode_end, PyObject *consts, int n)
+fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t codelen,
+                        Py_ssize_t c_start, Py_ssize_t opcode_end,
+                        PyObject *consts, int n)
 {
     /* Pre-conditions */
     assert(PyList_CheckExact(consts));
@@ -142,7 +143,7 @@ fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
 
     for (Py_ssize_t i = 0, pos = c_start; i < n; i++, pos++) {
         assert(pos < opcode_end);
-        pos = find_op(codestr, pos);
+        pos = find_op(codestr, codelen, pos);
         assert(_Py_OPCODE(codestr[pos]) == LOAD_CONST);
 
         unsigned int arg = get_arg(codestr, pos);
@@ -267,7 +268,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
         goto exitError;
     assert(PyList_Check(consts));
 
-    for (i=find_op(codestr, 0) ; i<codelen ; i=nexti) {
+    for (i=find_op(codestr, codelen, 0) ; i<codelen ; i=nexti) {
         opcode = _Py_OPCODE(codestr[i]);
         op_start = i;
         while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) {
@@ -305,7 +306,8 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
                 if (j > 0 && lastlc >= j) {
                     h = lastn_const_start(codestr, op_start, j);
                     if (ISBASICBLOCK(blocks, h, op_start)) {
-                        h = fold_tuple_on_constants(codestr, h, i+1, consts, j);
+                        h = fold_tuple_on_constants(codestr, codelen,
+                                                    h, i+1, consts, j);
                         break;
                     }
                 }
@@ -342,7 +344,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
             case JUMP_IF_FALSE_OR_POP:
             case JUMP_IF_TRUE_OR_POP:
                 h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT);
-                tgt = find_op(codestr, h);
+                tgt = find_op(codestr, codelen, h);
 
                 j = _Py_OPCODE(codestr[tgt]);
                 if (CONDITIONAL_JUMP(j)) {
@@ -383,7 +385,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
             case SETUP_WITH:
             case SETUP_ASYNC_WITH:
                 h = GETJUMPTGT(codestr, i);
-                tgt = find_op(codestr, h);
+                tgt = find_op(codestr, codelen, h);
                 /* Replace JUMP_* to a RETURN into just a RETURN */
                 if (UNCONDITIONAL_JUMP(opcode) &&
                     _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) {
@@ -412,7 +414,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
                 }
                 if (h > i + 1) {
                     fill_nops(codestr, i + 1, h);
-                    nexti = find_op(codestr, h);
+                    nexti = find_op(codestr, codelen, h);
                 }
                 break;
         }



More information about the Python-checkins mailing list