[Python-checkins] gh-98003: Inline call frames for CALL_FUNCTION_EX (GH-98004)
Fidget-Spinner
webhook-mailer at python.org
Sun Apr 30 09:08:33 EDT 2023
https://github.com/python/cpython/commit/ed95e8cbd4cbc813666c7ce7760257cc0f169d03
commit: ed95e8cbd4cbc813666c7ce7760257cc0f169d03
branch: main
author: Ken Jin <kenjin at python.org>
committer: Fidget-Spinner <kenjin at python.org>
date: 2023-04-30T21:08:26+08:00
summary:
gh-98003: Inline call frames for CALL_FUNCTION_EX (GH-98004)
files:
A Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst
M Include/internal/pycore_call.h
M Objects/call.c
M Python/bytecodes.c
M Python/ceval.c
M Python/generated_cases.c.h
diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h
index 55378e3dfebf..5d9342b562b0 100644
--- a/Include/internal/pycore_call.h
+++ b/Include/internal/pycore_call.h
@@ -116,6 +116,16 @@ _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const
return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL);
}
+PyObject *const *
+_PyStack_UnpackDict(PyThreadState *tstate,
+ PyObject *const *args, Py_ssize_t nargs,
+ PyObject *kwargs, PyObject **p_kwnames);
+
+void
+_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
+ PyObject *kwnames);
+
+void _PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames);
#ifdef __cplusplus
}
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst
new file mode 100644
index 000000000000..f9e71bc1344b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst
@@ -0,0 +1,3 @@
+Complex function calls are now faster and consume no C stack
+space.
+
diff --git a/Objects/call.c b/Objects/call.c
index bd027e41f8a9..cf6e357a9904 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -8,16 +8,6 @@
#include "pycore_tuple.h" // _PyTuple_ITEMS()
-static PyObject *const *
-_PyStack_UnpackDict(PyThreadState *tstate,
- PyObject *const *args, Py_ssize_t nargs,
- PyObject *kwargs, PyObject **p_kwnames);
-
-static void
-_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
- PyObject *kwnames);
-
-
static PyObject *
null_error(PyThreadState *tstate)
{
@@ -965,7 +955,7 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
-static PyObject *const *
+PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames)
@@ -1034,7 +1024,7 @@ _PyStack_UnpackDict(PyThreadState *tstate,
return stack;
}
-static void
+void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames)
{
@@ -1042,6 +1032,12 @@ _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
for (Py_ssize_t i = 0; i < n; i++) {
Py_DECREF(stack[i]);
}
+ _PyStack_UnpackDict_FreeNoDecRef(stack, kwnames);
+}
+
+void
+_PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames)
+{
PyMem_Free((PyObject **)stack - 1);
Py_DECREF(kwnames);
}
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 9de0d92e382d..e83894e89028 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3103,6 +3103,25 @@ dummy_func(
}
}
else {
+ if (Py_TYPE(func) == &PyFunction_Type &&
+ tstate->interp->eval_frame == NULL &&
+ ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
+ assert(PyTuple_CheckExact(callargs));
+ Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
+ int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
+ PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
+
+ _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
+ (PyFunctionObject *)func, locals,
+ nargs, callargs, kwargs);
+ // Need to manually shrink the stack since we exit with DISPATCH_INLINED.
+ STACK_SHRINK(oparg + 3);
+ if (new_frame == NULL) {
+ goto error;
+ }
+ frame->return_offset = 0;
+ DISPATCH_INLINED(new_frame);
+ }
result = PyObject_Call(func, callargs, kwargs);
}
DECREF_INPUTS();
diff --git a/Python/ceval.c b/Python/ceval.c
index 5d5221b2e409..958689debc87 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -212,6 +212,9 @@ static _PyInterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames);
+static _PyInterpreterFrame *
+_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
+ PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
@@ -1501,6 +1504,49 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
return NULL;
}
+/* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict.
+ Steals references to func, callargs and kwargs.
+*/
+static _PyInterpreterFrame *
+_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
+ PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs)
+{
+ bool has_dict = (kwargs != NULL && PyDict_GET_SIZE(kwargs) > 0);
+ PyObject *kwnames = NULL;
+ PyObject *const *newargs;
+ if (has_dict) {
+ newargs = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames);
+ if (newargs == NULL) {
+ Py_DECREF(func);
+ goto error;
+ }
+ }
+ else {
+ newargs = &PyTuple_GET_ITEM(callargs, 0);
+ /* We need to incref all our args since the new frame steals the references. */
+ for (Py_ssize_t i = 0; i < nargs; ++i) {
+ Py_INCREF(PyTuple_GET_ITEM(callargs, i));
+ }
+ }
+ _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit(
+ tstate, (PyFunctionObject *)func, locals,
+ newargs, nargs, kwnames
+ );
+ if (has_dict) {
+ _PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames);
+ }
+ /* No need to decref func here because the reference has been stolen by
+ _PyEvalFramePushAndInit.
+ */
+ Py_DECREF(callargs);
+ Py_XDECREF(kwargs);
+ return new_frame;
+error:
+ Py_DECREF(callargs);
+ Py_XDECREF(kwargs);
+ return NULL;
+}
+
PyObject *
_PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals,
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 864a4f7bcaff..069a7ced0a4c 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -4296,16 +4296,35 @@
}
}
else {
+ if (Py_TYPE(func) == &PyFunction_Type &&
+ tstate->interp->eval_frame == NULL &&
+ ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
+ assert(PyTuple_CheckExact(callargs));
+ Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
+ int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
+ PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
+
+ _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
+ (PyFunctionObject *)func, locals,
+ nargs, callargs, kwargs);
+ // Need to manually shrink the stack since we exit with DISPATCH_INLINED.
+ STACK_SHRINK(oparg + 3);
+ if (new_frame == NULL) {
+ goto error;
+ }
+ frame->return_offset = 0;
+ DISPATCH_INLINED(new_frame);
+ }
result = PyObject_Call(func, callargs, kwargs);
}
- #line 4302 "Python/generated_cases.c.h"
+ #line 4321 "Python/generated_cases.c.h"
Py_DECREF(func);
Py_DECREF(callargs);
Py_XDECREF(kwargs);
- #line 3109 "Python/bytecodes.c"
+ #line 3128 "Python/bytecodes.c"
assert(PEEK(3 + (oparg & 1)) == NULL);
if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; }
- #line 4309 "Python/generated_cases.c.h"
+ #line 4328 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg & 1) ? 1 : 0));
STACK_SHRINK(2);
stack_pointer[-1] = result;
@@ -4320,7 +4339,7 @@
PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL;
PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL;
PyObject *func;
- #line 3119 "Python/bytecodes.c"
+ #line 3138 "Python/bytecodes.c"
PyFunctionObject *func_obj = (PyFunctionObject *)
PyFunction_New(codeobj, GLOBALS());
@@ -4349,14 +4368,14 @@
func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
func = (PyObject *)func_obj;
- #line 4353 "Python/generated_cases.c.h"
+ #line 4372 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0));
stack_pointer[-1] = func;
DISPATCH();
}
TARGET(RETURN_GENERATOR) {
- #line 3150 "Python/bytecodes.c"
+ #line 3169 "Python/bytecodes.c"
assert(PyFunction_Check(frame->f_funcobj));
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
@@ -4377,7 +4396,7 @@
frame = cframe.current_frame = prev;
_PyFrame_StackPush(frame, (PyObject *)gen);
goto resume_frame;
- #line 4381 "Python/generated_cases.c.h"
+ #line 4400 "Python/generated_cases.c.h"
}
TARGET(BUILD_SLICE) {
@@ -4385,15 +4404,15 @@
PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))];
PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))];
PyObject *slice;
- #line 3173 "Python/bytecodes.c"
+ #line 3192 "Python/bytecodes.c"
slice = PySlice_New(start, stop, step);
- #line 4391 "Python/generated_cases.c.h"
+ #line 4410 "Python/generated_cases.c.h"
Py_DECREF(start);
Py_DECREF(stop);
Py_XDECREF(step);
- #line 3175 "Python/bytecodes.c"
+ #line 3194 "Python/bytecodes.c"
if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; }
- #line 4397 "Python/generated_cases.c.h"
+ #line 4416 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg == 3) ? 1 : 0));
STACK_SHRINK(1);
stack_pointer[-1] = slice;
@@ -4404,7 +4423,7 @@
PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL;
PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))];
PyObject *result;
- #line 3179 "Python/bytecodes.c"
+ #line 3198 "Python/bytecodes.c"
/* Handles f-string value formatting. */
PyObject *(*conv_fn)(PyObject *);
int which_conversion = oparg & FVC_MASK;
@@ -4439,7 +4458,7 @@
Py_DECREF(value);
Py_XDECREF(fmt_spec);
if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; }
- #line 4443 "Python/generated_cases.c.h"
+ #line 4462 "Python/generated_cases.c.h"
STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0));
stack_pointer[-1] = result;
DISPATCH();
@@ -4448,10 +4467,10 @@
TARGET(COPY) {
PyObject *bottom = stack_pointer[-(1 + (oparg-1))];
PyObject *top;
- #line 3216 "Python/bytecodes.c"
+ #line 3235 "Python/bytecodes.c"
assert(oparg > 0);
top = Py_NewRef(bottom);
- #line 4455 "Python/generated_cases.c.h"
+ #line 4474 "Python/generated_cases.c.h"
STACK_GROW(1);
stack_pointer[-1] = top;
DISPATCH();
@@ -4463,7 +4482,7 @@
PyObject *rhs = stack_pointer[-1];
PyObject *lhs = stack_pointer[-2];
PyObject *res;
- #line 3221 "Python/bytecodes.c"
+ #line 3240 "Python/bytecodes.c"
#if ENABLE_SPECIALIZATION
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
@@ -4478,12 +4497,12 @@
assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops));
assert(binary_ops[oparg]);
res = binary_ops[oparg](lhs, rhs);
- #line 4482 "Python/generated_cases.c.h"
+ #line 4501 "Python/generated_cases.c.h"
Py_DECREF(lhs);
Py_DECREF(rhs);
- #line 3236 "Python/bytecodes.c"
+ #line 3255 "Python/bytecodes.c"
if (res == NULL) goto pop_2_error;
- #line 4487 "Python/generated_cases.c.h"
+ #line 4506 "Python/generated_cases.c.h"
STACK_SHRINK(1);
stack_pointer[-1] = res;
next_instr += 1;
@@ -4493,16 +4512,16 @@
TARGET(SWAP) {
PyObject *top = stack_pointer[-1];
PyObject *bottom = stack_pointer[-(2 + (oparg-2))];
- #line 3241 "Python/bytecodes.c"
+ #line 3260 "Python/bytecodes.c"
assert(oparg >= 2);
- #line 4499 "Python/generated_cases.c.h"
+ #line 4518 "Python/generated_cases.c.h"
stack_pointer[-1] = bottom;
stack_pointer[-(2 + (oparg-2))] = top;
DISPATCH();
}
TARGET(INSTRUMENTED_LINE) {
- #line 3245 "Python/bytecodes.c"
+ #line 3264 "Python/bytecodes.c"
_Py_CODEUNIT *here = next_instr-1;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
@@ -4522,11 +4541,11 @@
}
opcode = original_opcode;
DISPATCH_GOTO();
- #line 4526 "Python/generated_cases.c.h"
+ #line 4545 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_INSTRUCTION) {
- #line 3267 "Python/bytecodes.c"
+ #line 3286 "Python/bytecodes.c"
int next_opcode = _Py_call_instrumentation_instruction(
tstate, frame, next_instr-1);
if (next_opcode < 0) goto error;
@@ -4538,26 +4557,26 @@
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
DISPATCH_GOTO();
- #line 4542 "Python/generated_cases.c.h"
+ #line 4561 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_JUMP_FORWARD) {
- #line 3281 "Python/bytecodes.c"
+ #line 3300 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP);
- #line 4548 "Python/generated_cases.c.h"
+ #line 4567 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_JUMP_BACKWARD) {
- #line 3285 "Python/bytecodes.c"
+ #line 3304 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP);
- #line 4555 "Python/generated_cases.c.h"
+ #line 4574 "Python/generated_cases.c.h"
CHECK_EVAL_BREAKER();
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) {
- #line 3290 "Python/bytecodes.c"
+ #line 3309 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@@ -4566,12 +4585,12 @@
assert(err == 0 || err == 1);
int offset = err*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
- #line 4570 "Python/generated_cases.c.h"
+ #line 4589 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) {
- #line 3301 "Python/bytecodes.c"
+ #line 3320 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@@ -4580,12 +4599,12 @@
assert(err == 0 || err == 1);
int offset = (1-err)*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
- #line 4584 "Python/generated_cases.c.h"
+ #line 4603 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) {
- #line 3312 "Python/bytecodes.c"
+ #line 3331 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@@ -4598,12 +4617,12 @@
offset = 0;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
- #line 4602 "Python/generated_cases.c.h"
+ #line 4621 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) {
- #line 3327 "Python/bytecodes.c"
+ #line 3346 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@@ -4616,30 +4635,30 @@
offset = oparg;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
- #line 4620 "Python/generated_cases.c.h"
+ #line 4639 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(EXTENDED_ARG) {
- #line 3342 "Python/bytecodes.c"
+ #line 3361 "Python/bytecodes.c"
assert(oparg);
opcode = next_instr->op.code;
oparg = oparg << 8 | next_instr->op.arg;
PRE_DISPATCH_GOTO();
DISPATCH_GOTO();
- #line 4631 "Python/generated_cases.c.h"
+ #line 4650 "Python/generated_cases.c.h"
}
TARGET(CACHE) {
- #line 3350 "Python/bytecodes.c"
+ #line 3369 "Python/bytecodes.c"
assert(0 && "Executing a cache.");
Py_UNREACHABLE();
- #line 4638 "Python/generated_cases.c.h"
+ #line 4657 "Python/generated_cases.c.h"
}
TARGET(RESERVED) {
- #line 3355 "Python/bytecodes.c"
+ #line 3374 "Python/bytecodes.c"
assert(0 && "Executing RESERVED instruction.");
Py_UNREACHABLE();
- #line 4645 "Python/generated_cases.c.h"
+ #line 4664 "Python/generated_cases.c.h"
}
More information about the Python-checkins
mailing list