[Python-checkins] gh-98003: Inline call frames for CALL_FUNCTION_EX (GH-98004)

Fidget-Spinner webhook-mailer at python.org
Sun Apr 30 09:08:33 EDT 2023


https://github.com/python/cpython/commit/ed95e8cbd4cbc813666c7ce7760257cc0f169d03
commit: ed95e8cbd4cbc813666c7ce7760257cc0f169d03
branch: main
author: Ken Jin <kenjin at python.org>
committer: Fidget-Spinner <kenjin at python.org>
date: 2023-04-30T21:08:26+08:00
summary:

gh-98003: Inline call frames for CALL_FUNCTION_EX (GH-98004)

files:
A Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst
M Include/internal/pycore_call.h
M Objects/call.c
M Python/bytecodes.c
M Python/ceval.c
M Python/generated_cases.c.h

diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h
index 55378e3dfebf..5d9342b562b0 100644
--- a/Include/internal/pycore_call.h
+++ b/Include/internal/pycore_call.h
@@ -116,6 +116,16 @@ _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const
     return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL);
 }
 
+PyObject *const *
+_PyStack_UnpackDict(PyThreadState *tstate,
+    PyObject *const *args, Py_ssize_t nargs,
+    PyObject *kwargs, PyObject **p_kwnames);
+
+void
+_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
+    PyObject *kwnames);
+
+void _PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames);
 
 #ifdef __cplusplus
 }
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst
new file mode 100644
index 000000000000..f9e71bc1344b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst	
@@ -0,0 +1,3 @@
+Complex function calls are now faster and consume no C stack
+space.
+
diff --git a/Objects/call.c b/Objects/call.c
index bd027e41f8a9..cf6e357a9904 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -8,16 +8,6 @@
 #include "pycore_tuple.h"         // _PyTuple_ITEMS()
 
 
-static PyObject *const *
-_PyStack_UnpackDict(PyThreadState *tstate,
-                    PyObject *const *args, Py_ssize_t nargs,
-                    PyObject *kwargs, PyObject **p_kwnames);
-
-static void
-_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
-                         PyObject *kwnames);
-
-
 static PyObject *
 null_error(PyThreadState *tstate)
 {
@@ -965,7 +955,7 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
    The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
 
    When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
-static PyObject *const *
+PyObject *const *
 _PyStack_UnpackDict(PyThreadState *tstate,
                     PyObject *const *args, Py_ssize_t nargs,
                     PyObject *kwargs, PyObject **p_kwnames)
@@ -1034,7 +1024,7 @@ _PyStack_UnpackDict(PyThreadState *tstate,
     return stack;
 }
 
-static void
+void
 _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
                          PyObject *kwnames)
 {
@@ -1042,6 +1032,12 @@ _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
     for (Py_ssize_t i = 0; i < n; i++) {
         Py_DECREF(stack[i]);
     }
+    _PyStack_UnpackDict_FreeNoDecRef(stack, kwnames);
+}
+
+void
+_PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames)
+{
     PyMem_Free((PyObject **)stack - 1);
     Py_DECREF(kwnames);
 }
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 9de0d92e382d..e83894e89028 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3103,6 +3103,25 @@ dummy_func(
                 }
             }
             else {
+                if (Py_TYPE(func) == &PyFunction_Type &&
+                    tstate->interp->eval_frame == NULL &&
+                    ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
+                    assert(PyTuple_CheckExact(callargs));
+                    Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
+                    int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
+                    PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
+
+                    _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
+                                                                                (PyFunctionObject *)func, locals,
+                                                                                nargs, callargs, kwargs);
+                    // Need to manually shrink the stack since we exit with DISPATCH_INLINED.
+                    STACK_SHRINK(oparg + 3);
+                    if (new_frame == NULL) {
+                        goto error;
+                    }
+                    frame->return_offset = 0;
+                    DISPATCH_INLINED(new_frame);
+                }
                 result = PyObject_Call(func, callargs, kwargs);
             }
             DECREF_INPUTS();
diff --git a/Python/ceval.c b/Python/ceval.c
index 5d5221b2e409..958689debc87 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -212,6 +212,9 @@ static _PyInterpreterFrame *
 _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
                         PyObject *locals, PyObject* const* args,
                         size_t argcount, PyObject *kwnames);
+static  _PyInterpreterFrame *
+_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
+    PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
 static void
 _PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
 
@@ -1501,6 +1504,49 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
     return NULL;
 }
 
+/* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict.
+   Steals references to func, callargs and kwargs.
+*/
+static _PyInterpreterFrame *
+_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
+    PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs)
+{
+    bool has_dict = (kwargs != NULL && PyDict_GET_SIZE(kwargs) > 0);
+    PyObject *kwnames = NULL;
+    PyObject *const *newargs;
+    if (has_dict) {
+        newargs = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames);
+        if (newargs == NULL) {
+            Py_DECREF(func);
+            goto error;
+        }
+    }
+    else {
+        newargs = &PyTuple_GET_ITEM(callargs, 0);
+        /* We need to incref all our args since the new frame steals the references. */
+        for (Py_ssize_t i = 0; i < nargs; ++i) {
+            Py_INCREF(PyTuple_GET_ITEM(callargs, i));
+        }
+    }
+    _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit(
+        tstate, (PyFunctionObject *)func, locals,
+        newargs, nargs, kwnames
+    );
+    if (has_dict) {
+        _PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames);
+    }
+    /* No need to decref func here because the reference has been stolen by
+       _PyEvalFramePushAndInit.
+    */
+    Py_DECREF(callargs);
+    Py_XDECREF(kwargs);
+    return new_frame;
+error:
+    Py_DECREF(callargs);
+    Py_XDECREF(kwargs);
+    return NULL;
+}
+
 PyObject *
 _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func,
                PyObject *locals,
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 864a4f7bcaff..069a7ced0a4c 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -4296,16 +4296,35 @@
                 }
             }
             else {
+                if (Py_TYPE(func) == &PyFunction_Type &&
+                    tstate->interp->eval_frame == NULL &&
+                    ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
+                    assert(PyTuple_CheckExact(callargs));
+                    Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
+                    int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
+                    PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
+
+                    _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
+                                                                                (PyFunctionObject *)func, locals,
+                                                                                nargs, callargs, kwargs);
+                    // Need to manually shrink the stack since we exit with DISPATCH_INLINED.
+                    STACK_SHRINK(oparg + 3);
+                    if (new_frame == NULL) {
+                        goto error;
+                    }
+                    frame->return_offset = 0;
+                    DISPATCH_INLINED(new_frame);
+                }
                 result = PyObject_Call(func, callargs, kwargs);
             }
-            #line 4302 "Python/generated_cases.c.h"
+            #line 4321 "Python/generated_cases.c.h"
             Py_DECREF(func);
             Py_DECREF(callargs);
             Py_XDECREF(kwargs);
-            #line 3109 "Python/bytecodes.c"
+            #line 3128 "Python/bytecodes.c"
             assert(PEEK(3 + (oparg & 1)) == NULL);
             if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; }
-            #line 4309 "Python/generated_cases.c.h"
+            #line 4328 "Python/generated_cases.c.h"
             STACK_SHRINK(((oparg & 1) ? 1 : 0));
             STACK_SHRINK(2);
             stack_pointer[-1] = result;
@@ -4320,7 +4339,7 @@
             PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL;
             PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL;
             PyObject *func;
-            #line 3119 "Python/bytecodes.c"
+            #line 3138 "Python/bytecodes.c"
 
             PyFunctionObject *func_obj = (PyFunctionObject *)
                 PyFunction_New(codeobj, GLOBALS());
@@ -4349,14 +4368,14 @@
 
             func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
             func = (PyObject *)func_obj;
-            #line 4353 "Python/generated_cases.c.h"
+            #line 4372 "Python/generated_cases.c.h"
             STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0));
             stack_pointer[-1] = func;
             DISPATCH();
         }
 
         TARGET(RETURN_GENERATOR) {
-            #line 3150 "Python/bytecodes.c"
+            #line 3169 "Python/bytecodes.c"
             assert(PyFunction_Check(frame->f_funcobj));
             PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
             PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
@@ -4377,7 +4396,7 @@
             frame = cframe.current_frame = prev;
             _PyFrame_StackPush(frame, (PyObject *)gen);
             goto resume_frame;
-            #line 4381 "Python/generated_cases.c.h"
+            #line 4400 "Python/generated_cases.c.h"
         }
 
         TARGET(BUILD_SLICE) {
@@ -4385,15 +4404,15 @@
             PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))];
             PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))];
             PyObject *slice;
-            #line 3173 "Python/bytecodes.c"
+            #line 3192 "Python/bytecodes.c"
             slice = PySlice_New(start, stop, step);
-            #line 4391 "Python/generated_cases.c.h"
+            #line 4410 "Python/generated_cases.c.h"
             Py_DECREF(start);
             Py_DECREF(stop);
             Py_XDECREF(step);
-            #line 3175 "Python/bytecodes.c"
+            #line 3194 "Python/bytecodes.c"
             if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; }
-            #line 4397 "Python/generated_cases.c.h"
+            #line 4416 "Python/generated_cases.c.h"
             STACK_SHRINK(((oparg == 3) ? 1 : 0));
             STACK_SHRINK(1);
             stack_pointer[-1] = slice;
@@ -4404,7 +4423,7 @@
             PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL;
             PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))];
             PyObject *result;
-            #line 3179 "Python/bytecodes.c"
+            #line 3198 "Python/bytecodes.c"
             /* Handles f-string value formatting. */
             PyObject *(*conv_fn)(PyObject *);
             int which_conversion = oparg & FVC_MASK;
@@ -4439,7 +4458,7 @@
             Py_DECREF(value);
             Py_XDECREF(fmt_spec);
             if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; }
-            #line 4443 "Python/generated_cases.c.h"
+            #line 4462 "Python/generated_cases.c.h"
             STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0));
             stack_pointer[-1] = result;
             DISPATCH();
@@ -4448,10 +4467,10 @@
         TARGET(COPY) {
             PyObject *bottom = stack_pointer[-(1 + (oparg-1))];
             PyObject *top;
-            #line 3216 "Python/bytecodes.c"
+            #line 3235 "Python/bytecodes.c"
             assert(oparg > 0);
             top = Py_NewRef(bottom);
-            #line 4455 "Python/generated_cases.c.h"
+            #line 4474 "Python/generated_cases.c.h"
             STACK_GROW(1);
             stack_pointer[-1] = top;
             DISPATCH();
@@ -4463,7 +4482,7 @@
             PyObject *rhs = stack_pointer[-1];
             PyObject *lhs = stack_pointer[-2];
             PyObject *res;
-            #line 3221 "Python/bytecodes.c"
+            #line 3240 "Python/bytecodes.c"
             #if ENABLE_SPECIALIZATION
             _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
             if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
@@ -4478,12 +4497,12 @@
             assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops));
             assert(binary_ops[oparg]);
             res = binary_ops[oparg](lhs, rhs);
-            #line 4482 "Python/generated_cases.c.h"
+            #line 4501 "Python/generated_cases.c.h"
             Py_DECREF(lhs);
             Py_DECREF(rhs);
-            #line 3236 "Python/bytecodes.c"
+            #line 3255 "Python/bytecodes.c"
             if (res == NULL) goto pop_2_error;
-            #line 4487 "Python/generated_cases.c.h"
+            #line 4506 "Python/generated_cases.c.h"
             STACK_SHRINK(1);
             stack_pointer[-1] = res;
             next_instr += 1;
@@ -4493,16 +4512,16 @@
         TARGET(SWAP) {
             PyObject *top = stack_pointer[-1];
             PyObject *bottom = stack_pointer[-(2 + (oparg-2))];
-            #line 3241 "Python/bytecodes.c"
+            #line 3260 "Python/bytecodes.c"
             assert(oparg >= 2);
-            #line 4499 "Python/generated_cases.c.h"
+            #line 4518 "Python/generated_cases.c.h"
             stack_pointer[-1] = bottom;
             stack_pointer[-(2 + (oparg-2))] = top;
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_LINE) {
-            #line 3245 "Python/bytecodes.c"
+            #line 3264 "Python/bytecodes.c"
             _Py_CODEUNIT *here = next_instr-1;
             _PyFrame_SetStackPointer(frame, stack_pointer);
             int original_opcode = _Py_call_instrumentation_line(
@@ -4522,11 +4541,11 @@
             }
             opcode = original_opcode;
             DISPATCH_GOTO();
-            #line 4526 "Python/generated_cases.c.h"
+            #line 4545 "Python/generated_cases.c.h"
         }
 
         TARGET(INSTRUMENTED_INSTRUCTION) {
-            #line 3267 "Python/bytecodes.c"
+            #line 3286 "Python/bytecodes.c"
             int next_opcode = _Py_call_instrumentation_instruction(
                 tstate, frame, next_instr-1);
             if (next_opcode < 0) goto error;
@@ -4538,26 +4557,26 @@
             assert(next_opcode > 0 && next_opcode < 256);
             opcode = next_opcode;
             DISPATCH_GOTO();
-            #line 4542 "Python/generated_cases.c.h"
+            #line 4561 "Python/generated_cases.c.h"
         }
 
         TARGET(INSTRUMENTED_JUMP_FORWARD) {
-            #line 3281 "Python/bytecodes.c"
+            #line 3300 "Python/bytecodes.c"
             INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP);
-            #line 4548 "Python/generated_cases.c.h"
+            #line 4567 "Python/generated_cases.c.h"
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_JUMP_BACKWARD) {
-            #line 3285 "Python/bytecodes.c"
+            #line 3304 "Python/bytecodes.c"
             INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP);
-            #line 4555 "Python/generated_cases.c.h"
+            #line 4574 "Python/generated_cases.c.h"
             CHECK_EVAL_BREAKER();
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) {
-            #line 3290 "Python/bytecodes.c"
+            #line 3309 "Python/bytecodes.c"
             PyObject *cond = POP();
             int err = PyObject_IsTrue(cond);
             Py_DECREF(cond);
@@ -4566,12 +4585,12 @@
             assert(err == 0 || err == 1);
             int offset = err*oparg;
             INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
-            #line 4570 "Python/generated_cases.c.h"
+            #line 4589 "Python/generated_cases.c.h"
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) {
-            #line 3301 "Python/bytecodes.c"
+            #line 3320 "Python/bytecodes.c"
             PyObject *cond = POP();
             int err = PyObject_IsTrue(cond);
             Py_DECREF(cond);
@@ -4580,12 +4599,12 @@
             assert(err == 0 || err == 1);
             int offset = (1-err)*oparg;
             INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
-            #line 4584 "Python/generated_cases.c.h"
+            #line 4603 "Python/generated_cases.c.h"
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) {
-            #line 3312 "Python/bytecodes.c"
+            #line 3331 "Python/bytecodes.c"
             PyObject *value = POP();
             _Py_CODEUNIT *here = next_instr-1;
             int offset;
@@ -4598,12 +4617,12 @@
                 offset = 0;
             }
             INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
-            #line 4602 "Python/generated_cases.c.h"
+            #line 4621 "Python/generated_cases.c.h"
             DISPATCH();
         }
 
         TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) {
-            #line 3327 "Python/bytecodes.c"
+            #line 3346 "Python/bytecodes.c"
             PyObject *value = POP();
             _Py_CODEUNIT *here = next_instr-1;
             int offset;
@@ -4616,30 +4635,30 @@
                  offset = oparg;
             }
             INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
-            #line 4620 "Python/generated_cases.c.h"
+            #line 4639 "Python/generated_cases.c.h"
             DISPATCH();
         }
 
         TARGET(EXTENDED_ARG) {
-            #line 3342 "Python/bytecodes.c"
+            #line 3361 "Python/bytecodes.c"
             assert(oparg);
             opcode = next_instr->op.code;
             oparg = oparg << 8 | next_instr->op.arg;
             PRE_DISPATCH_GOTO();
             DISPATCH_GOTO();
-            #line 4631 "Python/generated_cases.c.h"
+            #line 4650 "Python/generated_cases.c.h"
         }
 
         TARGET(CACHE) {
-            #line 3350 "Python/bytecodes.c"
+            #line 3369 "Python/bytecodes.c"
             assert(0 && "Executing a cache.");
             Py_UNREACHABLE();
-            #line 4638 "Python/generated_cases.c.h"
+            #line 4657 "Python/generated_cases.c.h"
         }
 
         TARGET(RESERVED) {
-            #line 3355 "Python/bytecodes.c"
+            #line 3374 "Python/bytecodes.c"
             assert(0 && "Executing RESERVED instruction.");
             Py_UNREACHABLE();
-            #line 4645 "Python/generated_cases.c.h"
+            #line 4664 "Python/generated_cases.c.h"
         }



More information about the Python-checkins mailing list