[Python-checkins] bpo-45953: Statically allocate the main interpreter (and initial thread state). (gh-29883)
ericsnowcurrently
webhook-mailer at python.org
Wed Jan 12 18:28:56 EST 2022
https://github.com/python/cpython/commit/ed57b36c32e521162dbb97199e64a340d3bff827
commit: ed57b36c32e521162dbb97199e64a340d3bff827
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2022-01-12T16:28:46-07:00
summary:
bpo-45953: Statically allocate the main interpreter (and initial thread state). (gh-29883)
Previously, the main interpreter was allocated on the heap during runtime initialization. Here we instead embed it into _PyRuntimeState, which means it is statically allocated as part of the _PyRuntime global. The same goes for the initial thread state (of each interpreter, including the main one). Consequently there are fewer allocations during runtime/interpreter init, fewer possible failures, and better memory locality.
FYI, this also helps efforts to consolidate globals, which in turns helps work on subinterpreter isolation.
https://bugs.python.org/issue45953
files:
A Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst
M Include/cpython/pystate.h
M Include/internal/pycore_global_objects.h
M Include/internal/pycore_interp.h
M Include/internal/pycore_runtime.h
M Modules/signalmodule.c
M Python/ceval.c
M Python/pystate.c
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index c37123c4f6922..bcb1bb25a4940 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -2,6 +2,9 @@
# error "this header file must not be included directly"
#endif
+#include <stdbool.h>
+
+
PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *);
PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int);
@@ -83,6 +86,9 @@ struct _ts {
after allocation. */
int _initialized;
+ /* Was this thread state statically allocated? */
+ bool _static;
+
int recursion_remaining;
int recursion_limit;
int recursion_headroom; /* Allow 50 more calls to handle any errors. */
@@ -175,9 +181,11 @@ struct _ts {
PyObject **datastack_top;
PyObject **datastack_limit;
/* XXX signal handlers should also be here */
-
};
+
+/* other API */
+
// Alias for backward compatibility with Python 3.8
#define _PyInterpreterState_Get PyInterpreterState_Get
diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h
index d2dc907c53d6d..de7ab9b53eb26 100644
--- a/Include/internal/pycore_global_objects.h
+++ b/Include/internal/pycore_global_objects.h
@@ -606,10 +606,6 @@ struct _Py_global_objects {
}, \
}
-static inline void
-_Py_global_objects_reset(struct _Py_global_objects *objects)
-{
-}
#ifdef __cplusplus
}
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index d48ea87fd67fe..77e42b65f5d3c 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
+#include <stdbool.h>
+
#include "pycore_atomic.h" // _Py_atomic_address
#include "pycore_ast_state.h" // struct ast_state
#include "pycore_context.h" // struct _Py_context_state
@@ -70,13 +72,18 @@ struct atexit_state {
/* interpreter state */
-// The PyInterpreterState typedef is in Include/pystate.h.
+/* PyInterpreterState holds the global state for one of the runtime's
+ interpreters. Typically the initial (main) interpreter is the only one.
+
+ The PyInterpreterState typedef is in Include/pystate.h.
+ */
struct _is {
struct _is *next;
struct pythreads {
uint64_t next_unique_id;
+ /* The linked list of threads, newest first. */
struct _ts *head;
/* Used in Modules/_threadmodule.c. */
long count;
@@ -104,6 +111,9 @@ struct _is {
int _initialized;
int finalizing;
+ /* Was this interpreter statically allocated? */
+ bool _static;
+
struct _ceval_state ceval;
struct _gc_runtime_state gc;
@@ -166,8 +176,26 @@ struct _is {
struct ast_state ast;
struct type_cache type_cache;
+
+ /* The following fields are here to avoid allocation during init.
+ The data is exposed through PyInterpreterState pointer fields.
+ These fields should not be accessed directly outside of init.
+
+ All other PyInterpreterState pointer fields are populated when
+ needed and default to NULL.
+
+ For now there are some exceptions to that rule, which require
+ allocation during init. These will be addressed on a case-by-case
+ basis. Also see _PyRuntimeState regarding the various mutex fields.
+ */
+
+ /* the initial PyInterpreterState.threads.head */
+ struct _ts _initial_thread;
};
+
+/* other API */
+
extern void _PyInterpreterState_ClearModules(PyInterpreterState *interp);
extern void _PyInterpreterState_Clear(PyThreadState *tstate);
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index 725c859ea7853..a66a3cf3a3944 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -11,8 +11,10 @@ extern "C" {
#include "pycore_atomic.h" /* _Py_atomic_address */
#include "pycore_gil.h" // struct _gil_runtime_state
#include "pycore_global_objects.h" // struct _Py_global_objects
+#include "pycore_interp.h" // struct _is
#include "pycore_unicodeobject.h" // struct _Py_unicode_runtime_ids
+
/* ceval state */
struct _ceval_runtime_state {
@@ -53,6 +55,9 @@ typedef struct _Py_AuditHookEntry {
/* Full Python runtime state */
+/* _PyRuntimeState holds the global state for the CPython runtime.
+ That data is exposed in the internal API as a static variable (_PyRuntime).
+ */
typedef struct pyruntimestate {
/* Has been initialized to a safe state.
@@ -81,7 +86,11 @@ typedef struct pyruntimestate {
struct pyinterpreters {
PyThread_type_lock mutex;
+ /* The linked list of interpreters, newest first. */
PyInterpreterState *head;
+ /* The runtime's initial interpreter, which has a special role
+ in the operation of the runtime. It is also often the only
+ interpreter. */
PyInterpreterState *main;
/* _next_interp_id is an auto-numbered sequence of small
integers. It gets initialized in _PyInterpreterState_Init(),
@@ -118,25 +127,44 @@ typedef struct pyruntimestate {
struct _Py_unicode_runtime_ids unicode_ids;
+ /* All the objects that are shared by the runtime's interpreters. */
struct _Py_global_objects global_objects;
- // If anything gets added after global_objects then
- // _PyRuntimeState_reset() needs to get updated to clear it.
+
+ /* The following fields are here to avoid allocation during init.
+ The data is exposed through _PyRuntimeState pointer fields.
+ These fields should not be accessed directly outside of init.
+
+ All other _PyRuntimeState pointer fields are populated when
+ needed and default to NULL.
+
+ For now there are some exceptions to that rule, which require
+ allocation during init. These will be addressed on a case-by-case
+ basis. Most notably, we don't pre-allocated the several mutex
+ (PyThread_type_lock) fields, because on Windows we only ever get
+ a pointer type.
+ */
+
+ /* PyInterpreterState.interpreters.main */
+ PyInterpreterState _main_interpreter;
} _PyRuntimeState;
+#define _PyThreadState_INIT \
+ { \
+ ._static = 1, \
+ }
+#define _PyInterpreterState_INIT \
+ { \
+ ._static = 1, \
+ ._initial_thread = _PyThreadState_INIT, \
+ }
#define _PyRuntimeState_INIT \
{ \
.global_objects = _Py_global_objects_INIT, \
+ ._main_interpreter = _PyInterpreterState_INIT, \
}
-/* Note: _PyRuntimeState_INIT sets other fields to 0/NULL */
-static inline void
-_PyRuntimeState_reset(_PyRuntimeState *runtime)
-{
- /* Make it match _PyRuntimeState_INIT. */
- memset(runtime, 0, (size_t)&runtime->global_objects - (size_t)runtime);
- _Py_global_objects_reset(&runtime->global_objects);
-}
+/* other API */
PyAPI_DATA(_PyRuntimeState) _PyRuntime;
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst
new file mode 100644
index 0000000000000..4fa27b60c02f8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-01-11-54-27.bpo-45953.2znR0E.rst
@@ -0,0 +1,4 @@
+The main interpreter in _PyRuntimeState.interpreters is now statically
+allocated (as part of _PyRuntime). Likewise for the initial thread state of
+each interpreter. This means less allocation during runtime init, as well
+as better memory locality for these key state objects.
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index 9316a9eed7684..e6f56e0aea9a9 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -292,7 +292,7 @@ trip_signal(int sig_num)
_Py_atomic_store(&is_tripped, 1);
/* Signals are always handled by the main interpreter */
- PyInterpreterState *interp = _PyRuntime.interpreters.main;
+ PyInterpreterState *interp = _PyInterpreterState_Main();
/* Notify ceval.c */
_PyEval_SignalReceived(interp);
diff --git a/Python/ceval.c b/Python/ceval.c
index 8e878cbf7e2b3..d33cd4e1edb5d 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -617,7 +617,7 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
}
else {
/* Last resort: use the main interpreter */
- interp = _PyRuntime.interpreters.main;
+ interp = _PyInterpreterState_Main();
}
return _PyEval_AddPendingCall(interp, func, arg);
}
diff --git a/Python/pystate.c b/Python/pystate.c
index 68fae8d283091..a18a159b55175 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -46,6 +46,10 @@ static PyThreadState *_PyGILState_GetThisThreadState(struct _gilstate_runtime_st
static void _PyThreadState_Delete(PyThreadState *tstate, int check_current);
+/* We use "initial" if the runtime gets re-used
+ (e.g. Py_Finalize() followed by Py_Initialize(). */
+static const _PyRuntimeState initial = _PyRuntimeState_INIT;
+
static int
alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
PyThread_type_lock *plock3)
@@ -91,9 +95,12 @@ init_runtime(_PyRuntimeState *runtime,
PyThread_type_lock xidregistry_mutex)
{
if (runtime->_initialized) {
- _PyRuntimeState_reset(runtime);
- assert(!runtime->initialized);
+ Py_FatalError("runtime already initialized");
}
+ assert(!runtime->preinitializing &&
+ !runtime->preinitialized &&
+ !runtime->core_initialized &&
+ !runtime->initialized);
runtime->open_code_hook = open_code_hook;
runtime->open_code_userdata = open_code_userdata;
@@ -144,6 +151,11 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
return _PyStatus_NO_MEMORY();
}
+ if (runtime->_initialized) {
+ // Py_Initialize() must be running again.
+ // Reset to _PyRuntimeState_INIT.
+ memcpy(runtime, &initial, sizeof(*runtime));
+ }
init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
unicode_next_index, lock1, lock2, lock3);
@@ -250,13 +262,15 @@ alloc_interpreter(void)
static void
free_interpreter(PyInterpreterState *interp)
{
- PyMem_RawFree(interp);
+ if (!interp->_static) {
+ PyMem_RawFree(interp);
+ }
}
/* Get the interpreter state to a minimal consistent state.
Further init happens in pylifecycle.c before it can be used.
All fields not initialized here are expected to be zeroed out,
- e.g. by PyMem_RawCalloc() or memset().
+ e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
The runtime state is not manipulated. Instead it is assumed that
the interpreter is getting added to the runtime.
*/
@@ -338,23 +352,23 @@ PyInterpreterState_New(void)
assert(interpreters->main == NULL);
assert(id == 0);
- interp = alloc_interpreter();
- if (interp == NULL) {
- goto error;
- }
+ interp = &runtime->_main_interpreter;
assert(interp->id == 0);
assert(interp->next == NULL);
interpreters->main = interp;
}
else {
- assert(id != 0);
assert(interpreters->main != NULL);
+ assert(id != 0);
interp = alloc_interpreter();
if (interp == NULL) {
goto error;
}
+ // Set to _PyInterpreterState_INIT.
+ memcpy(interp, &initial._main_interpreter,
+ sizeof(*interp));
if (id < 0) {
/* overflow or Py_Initialize() not called yet! */
@@ -735,13 +749,15 @@ alloc_threadstate(void)
static void
free_threadstate(PyThreadState *tstate)
{
- PyMem_RawFree(tstate);
+ if (!tstate->_static) {
+ PyMem_RawFree(tstate);
+ }
}
/* Get the thread state to a minimal consistent state.
Further init happens in pylifecycle.c before it can be used.
All fields not initialized here are expected to be zeroed out,
- e.g. by PyMem_RawCalloc() or memset().
+ e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
The interpreter state is not manipulated. Instead it is assumed that
the thread is getting added to the interpreter.
*/
@@ -808,10 +824,7 @@ new_threadstate(PyInterpreterState *interp)
// It's the interpreter's initial thread state.
assert(id == 1);
- tstate = alloc_threadstate();
- if (tstate == NULL) {
- goto error;
- }
+ tstate = &interp->_initial_thread;
}
else {
// Every valid interpreter must have at least one thread.
@@ -822,6 +835,10 @@ new_threadstate(PyInterpreterState *interp)
if (tstate == NULL) {
goto error;
}
+ // Set to _PyThreadState_INIT.
+ memcpy(tstate,
+ &initial._main_interpreter._initial_thread,
+ sizeof(*tstate));
}
interp->threads.head = tstate;
@@ -1159,7 +1176,7 @@ _PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate)
for (p = list; p; p = next) {
next = p->next;
PyThreadState_Clear(p);
- PyMem_RawFree(p);
+ free_threadstate(p);
}
}
More information about the Python-checkins
mailing list