[Python-checkins] bpo-45953: Statically allocate the main interpreter (and initial thread state). (gh-29883)

ericsnowcurrently webhook-mailer at python.org
Wed Jan 12 18:28:56 EST 2022


https://github.com/python/cpython/commit/ed57b36c32e521162dbb97199e64a340d3bff827
commit: ed57b36c32e521162dbb97199e64a340d3bff827
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2022-01-12T16:28:46-07:00
summary:

bpo-45953: Statically allocate the main interpreter (and initial thread state). (gh-29883)

Previously, the main interpreter was allocated on the heap during runtime initialization.  Here we instead embed it into _PyRuntimeState, which means it is statically allocated as part of the _PyRuntime global.  The same goes for the initial thread state (of each interpreter, including the main one).  Consequently there are fewer allocations during runtime/interpreter init, fewer possible failures, and better memory locality.

FYI, this also helps efforts to consolidate globals, which in turns helps work on subinterpreter isolation.

https://bugs.python.org/issue45953

files:
A Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst
M Include/cpython/pystate.h
M Include/internal/pycore_global_objects.h
M Include/internal/pycore_interp.h
M Include/internal/pycore_runtime.h
M Modules/signalmodule.c
M Python/ceval.c
M Python/pystate.c

diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index c37123c4f6922..bcb1bb25a4940 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -2,6 +2,9 @@
 #  error "this header file must not be included directly"
 #endif
 
+#include <stdbool.h>
+
+
 PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *);
 PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int);
 
@@ -83,6 +86,9 @@ struct _ts {
        after allocation. */
     int _initialized;
 
+    /* Was this thread state statically allocated? */
+    bool _static;
+
     int recursion_remaining;
     int recursion_limit;
     int recursion_headroom; /* Allow 50 more calls to handle any errors. */
@@ -175,9 +181,11 @@ struct _ts {
     PyObject **datastack_top;
     PyObject **datastack_limit;
     /* XXX signal handlers should also be here */
-
 };
 
+
+/* other API */
+
 // Alias for backward compatibility with Python 3.8
 #define _PyInterpreterState_Get PyInterpreterState_Get
 
diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h
index d2dc907c53d6d..de7ab9b53eb26 100644
--- a/Include/internal/pycore_global_objects.h
+++ b/Include/internal/pycore_global_objects.h
@@ -606,10 +606,6 @@ struct _Py_global_objects {
     }, \
 }
 
-static inline void
-_Py_global_objects_reset(struct _Py_global_objects *objects)
-{
-}
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index d48ea87fd67fe..77e42b65f5d3c 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -8,6 +8,8 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+#include <stdbool.h>
+
 #include "pycore_atomic.h"        // _Py_atomic_address
 #include "pycore_ast_state.h"     // struct ast_state
 #include "pycore_context.h"       // struct _Py_context_state
@@ -70,13 +72,18 @@ struct atexit_state {
 
 /* interpreter state */
 
-// The PyInterpreterState typedef is in Include/pystate.h.
+/* PyInterpreterState holds the global state for one of the runtime's
+   interpreters.  Typically the initial (main) interpreter is the only one.
+
+   The PyInterpreterState typedef is in Include/pystate.h.
+   */
 struct _is {
 
     struct _is *next;
 
     struct pythreads {
         uint64_t next_unique_id;
+        /* The linked list of threads, newest first. */
         struct _ts *head;
         /* Used in Modules/_threadmodule.c. */
         long count;
@@ -104,6 +111,9 @@ struct _is {
     int _initialized;
     int finalizing;
 
+    /* Was this interpreter statically allocated? */
+    bool _static;
+
     struct _ceval_state ceval;
     struct _gc_runtime_state gc;
 
@@ -166,8 +176,26 @@ struct _is {
 
     struct ast_state ast;
     struct type_cache type_cache;
+
+    /* The following fields are here to avoid allocation during init.
+       The data is exposed through PyInterpreterState pointer fields.
+       These fields should not be accessed directly outside of init.
+
+       All other PyInterpreterState pointer fields are populated when
+       needed and default to NULL.
+
+       For now there are some exceptions to that rule, which require
+       allocation during init.  These will be addressed on a case-by-case
+       basis.  Also see _PyRuntimeState regarding the various mutex fields.
+       */
+
+    /* the initial PyInterpreterState.threads.head */
+    struct _ts _initial_thread;
 };
 
+
+/* other API */
+
 extern void _PyInterpreterState_ClearModules(PyInterpreterState *interp);
 extern void _PyInterpreterState_Clear(PyThreadState *tstate);
 
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index 725c859ea7853..a66a3cf3a3944 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -11,8 +11,10 @@ extern "C" {
 #include "pycore_atomic.h"          /* _Py_atomic_address */
 #include "pycore_gil.h"             // struct _gil_runtime_state
 #include "pycore_global_objects.h"  // struct _Py_global_objects
+#include "pycore_interp.h"          // struct _is
 #include "pycore_unicodeobject.h"   // struct _Py_unicode_runtime_ids
 
+
 /* ceval state */
 
 struct _ceval_runtime_state {
@@ -53,6 +55,9 @@ typedef struct _Py_AuditHookEntry {
 
 /* Full Python runtime state */
 
+/* _PyRuntimeState holds the global state for the CPython runtime.
+   That data is exposed in the internal API as a static variable (_PyRuntime).
+   */
 typedef struct pyruntimestate {
     /* Has been initialized to a safe state.
 
@@ -81,7 +86,11 @@ typedef struct pyruntimestate {
 
     struct pyinterpreters {
         PyThread_type_lock mutex;
+        /* The linked list of interpreters, newest first. */
         PyInterpreterState *head;
+        /* The runtime's initial interpreter, which has a special role
+           in the operation of the runtime.  It is also often the only
+           interpreter. */
         PyInterpreterState *main;
         /* _next_interp_id is an auto-numbered sequence of small
            integers.  It gets initialized in _PyInterpreterState_Init(),
@@ -118,25 +127,44 @@ typedef struct pyruntimestate {
 
     struct _Py_unicode_runtime_ids unicode_ids;
 
+    /* All the objects that are shared by the runtime's interpreters. */
     struct _Py_global_objects global_objects;
-    // If anything gets added after global_objects then
-    // _PyRuntimeState_reset() needs to get updated to clear it.
+
+    /* The following fields are here to avoid allocation during init.
+       The data is exposed through _PyRuntimeState pointer fields.
+       These fields should not be accessed directly outside of init.
+
+       All other _PyRuntimeState pointer fields are populated when
+       needed and default to NULL.
+
+       For now there are some exceptions to that rule, which require
+       allocation during init.  These will be addressed on a case-by-case
+       basis.  Most notably, we don't pre-allocated the several mutex
+       (PyThread_type_lock) fields, because on Windows we only ever get
+       a pointer type.
+       */
+
+    /* PyInterpreterState.interpreters.main */
+    PyInterpreterState _main_interpreter;
 } _PyRuntimeState;
 
+#define _PyThreadState_INIT \
+    { \
+        ._static = 1, \
+    }
+#define _PyInterpreterState_INIT \
+    { \
+        ._static = 1, \
+        ._initial_thread = _PyThreadState_INIT, \
+    }
 #define _PyRuntimeState_INIT \
     { \
         .global_objects = _Py_global_objects_INIT, \
+        ._main_interpreter = _PyInterpreterState_INIT, \
     }
-/* Note: _PyRuntimeState_INIT sets other fields to 0/NULL */
 
-static inline void
-_PyRuntimeState_reset(_PyRuntimeState *runtime)
-{
-    /* Make it match _PyRuntimeState_INIT. */
-    memset(runtime, 0, (size_t)&runtime->global_objects - (size_t)runtime);
-    _Py_global_objects_reset(&runtime->global_objects);
-}
 
+/* other API */
 
 PyAPI_DATA(_PyRuntimeState) _PyRuntime;
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst
new file mode 100644
index 0000000000000..4fa27b60c02f8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst	
@@ -0,0 +1,4 @@
+The main interpreter in _PyRuntimeState.interpreters is now statically
+allocated (as part of _PyRuntime).  Likewise for the initial thread state of
+each interpreter.  This means less allocation during runtime init, as well
+as better memory locality for these key state objects.
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index 9316a9eed7684..e6f56e0aea9a9 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -292,7 +292,7 @@ trip_signal(int sig_num)
     _Py_atomic_store(&is_tripped, 1);
 
     /* Signals are always handled by the main interpreter */
-    PyInterpreterState *interp = _PyRuntime.interpreters.main;
+    PyInterpreterState *interp = _PyInterpreterState_Main();
 
     /* Notify ceval.c */
     _PyEval_SignalReceived(interp);
diff --git a/Python/ceval.c b/Python/ceval.c
index 8e878cbf7e2b3..d33cd4e1edb5d 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -617,7 +617,7 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
     }
     else {
         /* Last resort: use the main interpreter */
-        interp = _PyRuntime.interpreters.main;
+        interp = _PyInterpreterState_Main();
     }
     return _PyEval_AddPendingCall(interp, func, arg);
 }
diff --git a/Python/pystate.c b/Python/pystate.c
index 68fae8d283091..a18a159b55175 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -46,6 +46,10 @@ static PyThreadState *_PyGILState_GetThisThreadState(struct _gilstate_runtime_st
 static void _PyThreadState_Delete(PyThreadState *tstate, int check_current);
 
 
+/* We use "initial" if the runtime gets re-used
+   (e.g. Py_Finalize() followed by Py_Initialize(). */
+static const _PyRuntimeState initial = _PyRuntimeState_INIT;
+
 static int
 alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
                   PyThread_type_lock *plock3)
@@ -91,9 +95,12 @@ init_runtime(_PyRuntimeState *runtime,
              PyThread_type_lock xidregistry_mutex)
 {
     if (runtime->_initialized) {
-        _PyRuntimeState_reset(runtime);
-        assert(!runtime->initialized);
+        Py_FatalError("runtime already initialized");
     }
+    assert(!runtime->preinitializing &&
+           !runtime->preinitialized &&
+           !runtime->core_initialized &&
+           !runtime->initialized);
 
     runtime->open_code_hook = open_code_hook;
     runtime->open_code_userdata = open_code_userdata;
@@ -144,6 +151,11 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
         return _PyStatus_NO_MEMORY();
     }
 
+    if (runtime->_initialized) {
+        // Py_Initialize() must be running again.
+        // Reset to _PyRuntimeState_INIT.
+        memcpy(runtime, &initial, sizeof(*runtime));
+    }
     init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
                  unicode_next_index, lock1, lock2, lock3);
 
@@ -250,13 +262,15 @@ alloc_interpreter(void)
 static void
 free_interpreter(PyInterpreterState *interp)
 {
-    PyMem_RawFree(interp);
+    if (!interp->_static) {
+        PyMem_RawFree(interp);
+    }
 }
 
 /* Get the interpreter state to a minimal consistent state.
    Further init happens in pylifecycle.c before it can be used.
    All fields not initialized here are expected to be zeroed out,
-   e.g. by PyMem_RawCalloc() or memset().
+   e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
    The runtime state is not manipulated.  Instead it is assumed that
    the interpreter is getting added to the runtime.
   */
@@ -338,23 +352,23 @@ PyInterpreterState_New(void)
         assert(interpreters->main == NULL);
         assert(id == 0);
 
-        interp = alloc_interpreter();
-        if (interp == NULL) {
-            goto error;
-        }
+        interp = &runtime->_main_interpreter;
         assert(interp->id == 0);
         assert(interp->next == NULL);
 
         interpreters->main = interp;
     }
     else {
-        assert(id != 0);
         assert(interpreters->main != NULL);
+        assert(id != 0);
 
         interp = alloc_interpreter();
         if (interp == NULL) {
             goto error;
         }
+        // Set to _PyInterpreterState_INIT.
+        memcpy(interp, &initial._main_interpreter,
+               sizeof(*interp));
 
         if (id < 0) {
             /* overflow or Py_Initialize() not called yet! */
@@ -735,13 +749,15 @@ alloc_threadstate(void)
 static void
 free_threadstate(PyThreadState *tstate)
 {
-    PyMem_RawFree(tstate);
+    if (!tstate->_static) {
+        PyMem_RawFree(tstate);
+    }
 }
 
 /* Get the thread state to a minimal consistent state.
    Further init happens in pylifecycle.c before it can be used.
    All fields not initialized here are expected to be zeroed out,
-   e.g. by PyMem_RawCalloc() or memset().
+   e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
    The interpreter state is not manipulated.  Instead it is assumed that
    the thread is getting added to the interpreter.
   */
@@ -808,10 +824,7 @@ new_threadstate(PyInterpreterState *interp)
         // It's the interpreter's initial thread state.
         assert(id == 1);
 
-        tstate = alloc_threadstate();
-        if (tstate == NULL) {
-            goto error;
-        }
+        tstate = &interp->_initial_thread;
     }
     else {
         // Every valid interpreter must have at least one thread.
@@ -822,6 +835,10 @@ new_threadstate(PyInterpreterState *interp)
         if (tstate == NULL) {
             goto error;
         }
+        // Set to _PyThreadState_INIT.
+        memcpy(tstate,
+               &initial._main_interpreter._initial_thread,
+               sizeof(*tstate));
     }
     interp->threads.head = tstate;
 
@@ -1159,7 +1176,7 @@ _PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate)
     for (p = list; p; p = next) {
         next = p->next;
         PyThreadState_Clear(p);
-        PyMem_RawFree(p);
+        free_threadstate(p);
     }
 }
 



More information about the Python-checkins mailing list