[Python-checkins] GH-97002: Prevent `_PyInterpreterFrame`s from backing more than one `PyFrameObject` (GH-97996)

markshannon webhook-mailer at python.org
Thu Oct 6 19:20:10 EDT 2022


https://github.com/python/cpython/commit/21a2d9ff550977f2668e2cf1cc15793bf27fa109
commit: 21a2d9ff550977f2668e2cf1cc15793bf27fa109
branch: main
author: Brandt Bucher <brandtbucher at microsoft.com>
committer: markshannon <mark at hotpy.org>
date: 2022-10-07T00:20:01+01:00
summary:

GH-97002: Prevent `_PyInterpreterFrame`s from backing more than one `PyFrameObject` (GH-97996)

files:
A Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst
M Lib/test/test_frame.py
M Python/frame.c

diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py
index 5dda2fbfac37..4b86a60d2f4c 100644
--- a/Lib/test/test_frame.py
+++ b/Lib/test/test_frame.py
@@ -1,3 +1,4 @@
+import gc
 import re
 import sys
 import textwrap
@@ -261,5 +262,69 @@ def gen():
         """)
         assert_python_ok("-c", code)
 
+    @support.cpython_only
+    def test_sneaky_frame_object(self):
+
+        def trace(frame, event, arg):
+            """
+            Don't actually do anything, just force a frame object to be created.
+            """
+
+        def callback(phase, info):
+            """
+            Yo dawg, I heard you like frames, so I'm allocating a frame while
+            you're allocating a frame, so you can have a frame while you have a
+            frame!
+            """
+            nonlocal sneaky_frame_object
+            sneaky_frame_object = sys._getframe().f_back
+            # We're done here:
+            gc.callbacks.remove(callback)
+
+        def f():
+            while True:
+                yield
+
+        old_threshold = gc.get_threshold()
+        old_callbacks = gc.callbacks[:]
+        old_enabled = gc.isenabled()
+        old_trace = sys.gettrace()
+        try:
+            # Stop the GC for a second while we set things up:
+            gc.disable()
+            # Create a paused generator:
+            g = f()
+            next(g)
+            # Move all objects to the oldest generation, and tell the GC to run
+            # on the *very next* allocation:
+            gc.collect()
+            gc.set_threshold(1, 0, 0)
+            # Okay, so here's the nightmare scenario:
+            # - We're tracing the resumption of a generator, which creates a new
+            #   frame object.
+            # - The allocation of this frame object triggers a collection
+            #   *before* the frame object is actually created.
+            # - During the collection, we request the exact same frame object.
+            #   This test does it with a GC callback, but in real code it would
+            #   likely be a trace function, weakref callback, or finalizer.
+            # - The collection finishes, and the original frame object is
+            #   created. We now have two frame objects fighting over ownership
+            #   of the same interpreter frame!
+            sys.settrace(trace)
+            gc.callbacks.append(callback)
+            sneaky_frame_object = None
+            gc.enable()
+            next(g)
+            # g.gi_frame should be the the frame object from the callback (the
+            # one that was *requested* second, but *created* first):
+            self.assertIs(g.gi_frame, sneaky_frame_object)
+        finally:
+            gc.set_threshold(*old_threshold)
+            gc.callbacks[:] = old_callbacks
+            sys.settrace(old_trace)
+            if old_enabled:
+                gc.enable()
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst
new file mode 100644
index 000000000000..1f577e02e1fd
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-02-11-34.gh-issue-97002.Zvsk71.rst	
@@ -0,0 +1,3 @@
+Fix an issue where several frame objects could be backed by the same
+interpreter frame, possibly leading to corrupted memory and hard crashes of
+the interpreter.
diff --git a/Python/frame.c b/Python/frame.c
index 96566de63a78..89f084b110cb 100644
--- a/Python/frame.c
+++ b/Python/frame.c
@@ -37,14 +37,31 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
         Py_XDECREF(error_type);
         Py_XDECREF(error_value);
         Py_XDECREF(error_traceback);
+        return NULL;
     }
-    else {
-        assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
-        assert(frame->owner != FRAME_CLEARED);
-        f->f_frame = frame;
-        frame->frame_obj = f;
-        PyErr_Restore(error_type, error_value, error_traceback);
+    PyErr_Restore(error_type, error_value, error_traceback);
+    if (frame->frame_obj) {
+        // GH-97002: How did we get into this horrible situation? Most likely,
+        // allocating f triggered a GC collection, which ran some code that
+        // *also* created the same frame... while we were in the middle of
+        // creating it! See test_sneaky_frame_object in test_frame.py for a
+        // concrete example.
+        //
+        // Regardless, just throw f away and use that frame instead, since it's
+        // already been exposed to user code. It's actually a bit tricky to do
+        // this, since we aren't backed by a real _PyInterpreterFrame anymore.
+        // Just pretend that we have an owned, cleared frame so frame_dealloc
+        // doesn't make the situation worse:
+        f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
+        f->f_frame->owner = FRAME_CLEARED;
+        f->f_frame->frame_obj = f;
+        Py_DECREF(f);
+        return frame->frame_obj;
     }
+    assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
+    assert(frame->owner != FRAME_CLEARED);
+    f->f_frame = frame;
+    frame->frame_obj = f;
     return f;
 }
 



More information about the Python-checkins mailing list