[Python-checkins] bpo-44525: Specialize simple Python calls. (GH-29033)

markshannon webhook-mailer at python.org
Wed Oct 20 11:08:44 EDT 2021


https://github.com/python/cpython/commit/8863a0fcc5f04ab7c3428e713917831f9b1deb18
commit: 8863a0fcc5f04ab7c3428e713917831f9b1deb18
branch: main
author: Mark Shannon <mark at hotpy.org>
committer: markshannon <mark at hotpy.org>
date: 2021-10-20T16:08:28+01:00
summary:

bpo-44525: Specialize simple Python calls. (GH-29033)

files:
A Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst
M Include/internal/pycore_code.h
M Include/opcode.h
M Lib/opcode.py
M Python/ceval.c
M Python/opcode_targets.h
M Python/specialize.c

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index d464f3d2a8131..482bd7eb6ae70 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -35,6 +35,12 @@ typedef struct {
     PyObject *obj;
 } _PyObjectCache;
 
+typedef struct {
+    uint32_t func_version;
+    uint16_t defaults_start;
+    uint16_t defaults_len;
+} _PyCallCache;
+
 /* Add specialized versions of entries to this union.
  *
  * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -51,6 +57,7 @@ typedef union {
     _PyAttrCache attr;
     _PyLoadGlobalCache load_global;
     _PyObjectCache obj;
+    _PyCallCache call;
 } SpecializedCacheEntry;
 
 #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
diff --git a/Include/opcode.h b/Include/opcode.h
index 22d968ee0d4c7..f8c02b840e052 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -153,29 +153,30 @@ extern "C" {
 #define CALL_FUNCTION_BUILTIN_FAST  45
 #define CALL_FUNCTION_LEN        46
 #define CALL_FUNCTION_ISINSTANCE  47
-#define JUMP_ABSOLUTE_QUICK      48
-#define LOAD_ATTR_ADAPTIVE       58
-#define LOAD_ATTR_INSTANCE_VALUE  80
-#define LOAD_ATTR_WITH_HINT      81
-#define LOAD_ATTR_SLOT           87
-#define LOAD_ATTR_MODULE         88
-#define LOAD_GLOBAL_ADAPTIVE    120
-#define LOAD_GLOBAL_MODULE      122
-#define LOAD_GLOBAL_BUILTIN     123
-#define LOAD_METHOD_ADAPTIVE    127
-#define LOAD_METHOD_CACHED      128
-#define LOAD_METHOD_CLASS       134
-#define LOAD_METHOD_MODULE      140
-#define LOAD_METHOD_NO_DICT     143
-#define STORE_ATTR_ADAPTIVE     149
-#define STORE_ATTR_INSTANCE_VALUE 150
-#define STORE_ATTR_SLOT         151
-#define STORE_ATTR_WITH_HINT    153
-#define LOAD_FAST__LOAD_FAST    154
-#define STORE_FAST__LOAD_FAST   158
-#define LOAD_FAST__LOAD_CONST   159
-#define LOAD_CONST__LOAD_FAST   167
-#define STORE_FAST__STORE_FAST  168
+#define CALL_FUNCTION_PY_SIMPLE  48
+#define JUMP_ABSOLUTE_QUICK      58
+#define LOAD_ATTR_ADAPTIVE       80
+#define LOAD_ATTR_INSTANCE_VALUE  81
+#define LOAD_ATTR_WITH_HINT      87
+#define LOAD_ATTR_SLOT           88
+#define LOAD_ATTR_MODULE        120
+#define LOAD_GLOBAL_ADAPTIVE    122
+#define LOAD_GLOBAL_MODULE      123
+#define LOAD_GLOBAL_BUILTIN     127
+#define LOAD_METHOD_ADAPTIVE    128
+#define LOAD_METHOD_CACHED      134
+#define LOAD_METHOD_CLASS       140
+#define LOAD_METHOD_MODULE      143
+#define LOAD_METHOD_NO_DICT     149
+#define STORE_ATTR_ADAPTIVE     150
+#define STORE_ATTR_INSTANCE_VALUE 151
+#define STORE_ATTR_SLOT         153
+#define STORE_ATTR_WITH_HINT    154
+#define LOAD_FAST__LOAD_FAST    158
+#define STORE_FAST__LOAD_FAST   159
+#define LOAD_FAST__LOAD_CONST   167
+#define LOAD_CONST__LOAD_FAST   168
+#define STORE_FAST__STORE_FAST  169
 #define DO_TRACING              255
 #ifdef NEED_OPCODE_JUMP_TABLES
 static uint32_t _PyOpcode_RelativeJump[8] = {
diff --git a/Lib/opcode.py b/Lib/opcode.py
index fe6066fc517a6..5377ec32bf153 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -237,6 +237,7 @@ def jabs_op(name, op):
     "CALL_FUNCTION_BUILTIN_FAST",
     "CALL_FUNCTION_LEN",
     "CALL_FUNCTION_ISINSTANCE",
+    "CALL_FUNCTION_PY_SIMPLE",
     "JUMP_ABSOLUTE_QUICK",
     "LOAD_ATTR_ADAPTIVE",
     "LOAD_ATTR_INSTANCE_VALUE",
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst
new file mode 100644
index 0000000000000..6ab1d05603db8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst	
@@ -0,0 +1 @@
+Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure)
diff --git a/Python/ceval.c b/Python/ceval.c
index 76325903149cd..f4186dae8a448 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -4720,9 +4720,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
 
         TARGET(CALL_FUNCTION_ADAPTIVE) {
             SpecializedCacheEntry *cache = GET_CACHE();
+            nargs = cache->adaptive.original_oparg;
             if (cache->adaptive.counter == 0) {
                 next_instr--;
-                int nargs = cache->adaptive.original_oparg;
                 if (_Py_Specialize_CallFunction(
                     PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) {
                     goto error;
@@ -4732,9 +4732,48 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             else {
                 STAT_INC(CALL_FUNCTION, deferred);
                 cache->adaptive.counter--;
-                oparg = cache->adaptive.original_oparg;
-                JUMP_TO_INSTRUCTION(CALL_FUNCTION);
+                oparg = nargs;
+                kwnames = NULL;
+                postcall_shrink = 1;
+                goto call_function;
+            }
+        }
+
+        TARGET(CALL_FUNCTION_PY_SIMPLE) {
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            int argcount = cache0->original_oparg;
+            _PyCallCache *cache1 = &caches[-1].call;
+            PyObject *callable = PEEK(argcount+1);
+            DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
+            PyFunctionObject *func = (PyFunctionObject *)callable;
+            DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
+            /* PEP 523 */
+            DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
+            STAT_INC(CALL_FUNCTION, hit);
+            record_cache_hit(cache0);
+            InterpreterFrame *new_frame = _PyThreadState_PushFrame(
+                tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL);
+            if (new_frame == NULL) {
+                goto error;
+            }
+            STACK_SHRINK(argcount);
+            for (int i = 0; i < argcount; i++) {
+                new_frame->localsplus[i] = stack_pointer[i];
+            }
+            int deflen = cache1->defaults_len;
+            for (int i = 0; i < deflen; i++) {
+                PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i);
+                Py_INCREF(def);
+                new_frame->localsplus[argcount+i] = def;
             }
+            STACK_SHRINK(1);
+            Py_DECREF(func);
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            new_frame->previous = tstate->frame;
+            new_frame->depth = frame->depth + 1;
+            tstate->frame = frame = new_frame;
+            goto start_frame;
         }
 
         TARGET(CALL_FUNCTION_BUILTIN_O) {
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index 4179689e8a866..5c7d3ad544e56 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -47,7 +47,7 @@ static void *opcode_targets[256] = {
     &&TARGET_CALL_FUNCTION_BUILTIN_FAST,
     &&TARGET_CALL_FUNCTION_LEN,
     &&TARGET_CALL_FUNCTION_ISINSTANCE,
-    &&TARGET_JUMP_ABSOLUTE_QUICK,
+    &&TARGET_CALL_FUNCTION_PY_SIMPLE,
     &&TARGET_WITH_EXCEPT_START,
     &&TARGET_GET_AITER,
     &&TARGET_GET_ANEXT,
@@ -57,7 +57,7 @@ static void *opcode_targets[256] = {
     &&TARGET_INPLACE_ADD,
     &&TARGET_INPLACE_SUBTRACT,
     &&TARGET_INPLACE_MULTIPLY,
-    &&TARGET_LOAD_ATTR_ADAPTIVE,
+    &&TARGET_JUMP_ABSOLUTE_QUICK,
     &&TARGET_INPLACE_MODULO,
     &&TARGET_STORE_SUBSCR,
     &&TARGET_DELETE_SUBSCR,
@@ -79,15 +79,15 @@ static void *opcode_targets[256] = {
     &&TARGET_INPLACE_AND,
     &&TARGET_INPLACE_XOR,
     &&TARGET_INPLACE_OR,
+    &&TARGET_LOAD_ATTR_ADAPTIVE,
     &&TARGET_LOAD_ATTR_INSTANCE_VALUE,
-    &&TARGET_LOAD_ATTR_WITH_HINT,
     &&TARGET_LIST_TO_TUPLE,
     &&TARGET_RETURN_VALUE,
     &&TARGET_IMPORT_STAR,
     &&TARGET_SETUP_ANNOTATIONS,
     &&TARGET_YIELD_VALUE,
+    &&TARGET_LOAD_ATTR_WITH_HINT,
     &&TARGET_LOAD_ATTR_SLOT,
-    &&TARGET_LOAD_ATTR_MODULE,
     &&TARGET_POP_EXCEPT,
     &&TARGET_STORE_NAME,
     &&TARGET_DELETE_NAME,
@@ -119,46 +119,46 @@ static void *opcode_targets[256] = {
     &&TARGET_IS_OP,
     &&TARGET_CONTAINS_OP,
     &&TARGET_RERAISE,
-    &&TARGET_LOAD_GLOBAL_ADAPTIVE,
+    &&TARGET_LOAD_ATTR_MODULE,
     &&TARGET_JUMP_IF_NOT_EXC_MATCH,
+    &&TARGET_LOAD_GLOBAL_ADAPTIVE,
     &&TARGET_LOAD_GLOBAL_MODULE,
-    &&TARGET_LOAD_GLOBAL_BUILTIN,
     &&TARGET_LOAD_FAST,
     &&TARGET_STORE_FAST,
     &&TARGET_DELETE_FAST,
+    &&TARGET_LOAD_GLOBAL_BUILTIN,
     &&TARGET_LOAD_METHOD_ADAPTIVE,
-    &&TARGET_LOAD_METHOD_CACHED,
     &&TARGET_GEN_START,
     &&TARGET_RAISE_VARARGS,
     &&TARGET_CALL_FUNCTION,
     &&TARGET_MAKE_FUNCTION,
     &&TARGET_BUILD_SLICE,
-    &&TARGET_LOAD_METHOD_CLASS,
+    &&TARGET_LOAD_METHOD_CACHED,
     &&TARGET_MAKE_CELL,
     &&TARGET_LOAD_CLOSURE,
     &&TARGET_LOAD_DEREF,
     &&TARGET_STORE_DEREF,
     &&TARGET_DELETE_DEREF,
-    &&TARGET_LOAD_METHOD_MODULE,
+    &&TARGET_LOAD_METHOD_CLASS,
     &&TARGET_CALL_FUNCTION_KW,
     &&TARGET_CALL_FUNCTION_EX,
-    &&TARGET_LOAD_METHOD_NO_DICT,
+    &&TARGET_LOAD_METHOD_MODULE,
     &&TARGET_EXTENDED_ARG,
     &&TARGET_LIST_APPEND,
     &&TARGET_SET_ADD,
     &&TARGET_MAP_ADD,
     &&TARGET_LOAD_CLASSDEREF,
+    &&TARGET_LOAD_METHOD_NO_DICT,
     &&TARGET_STORE_ATTR_ADAPTIVE,
     &&TARGET_STORE_ATTR_INSTANCE_VALUE,
-    &&TARGET_STORE_ATTR_SLOT,
     &&TARGET_MATCH_CLASS,
+    &&TARGET_STORE_ATTR_SLOT,
     &&TARGET_STORE_ATTR_WITH_HINT,
-    &&TARGET_LOAD_FAST__LOAD_FAST,
     &&TARGET_FORMAT_VALUE,
     &&TARGET_BUILD_CONST_KEY_MAP,
     &&TARGET_BUILD_STRING,
+    &&TARGET_LOAD_FAST__LOAD_FAST,
     &&TARGET_STORE_FAST__LOAD_FAST,
-    &&TARGET_LOAD_FAST__LOAD_CONST,
     &&TARGET_LOAD_METHOD,
     &&TARGET_CALL_METHOD,
     &&TARGET_LIST_EXTEND,
@@ -166,6 +166,7 @@ static void *opcode_targets[256] = {
     &&TARGET_DICT_MERGE,
     &&TARGET_DICT_UPDATE,
     &&TARGET_CALL_METHOD_KW,
+    &&TARGET_LOAD_FAST__LOAD_CONST,
     &&TARGET_LOAD_CONST__LOAD_FAST,
     &&TARGET_STORE_FAST__STORE_FAST,
     &&_unknown_opcode,
@@ -253,6 +254,5 @@ static void *opcode_targets[256] = {
     &&_unknown_opcode,
     &&_unknown_opcode,
     &&_unknown_opcode,
-    &&_unknown_opcode,
     &&TARGET_DO_TRACING
 };
diff --git a/Python/specialize.c b/Python/specialize.c
index ee573d29a474e..5cc7082a35a21 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -249,7 +249,7 @@ static uint8_t cache_requirements[256] = {
     [BINARY_ADD] = 0,
     [BINARY_MULTIPLY] = 0,
     [BINARY_SUBSCR] = 0,
-    [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */
+    [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
     [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
 };
 
@@ -461,15 +461,20 @@ _Py_Quicken(PyCodeObject *code) {
 #define SPEC_FAIL_NON_FUNCTION_SCOPE 11
 #define SPEC_FAIL_DIFFERENT_TYPES 12
 
-/* Call function */
+/* Calls */
+#define SPEC_FAIL_GENERATOR 7
+#define SPEC_FAIL_COMPLEX_PARAMETERS 8
+#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 9
+#define SPEC_FAIL_CO_NOT_OPTIMIZED 10
+/* SPEC_FAIL_METHOD  defined as 11 above */
+#define SPEC_FAIL_FREE_VARS 12
+#define SPEC_FAIL_PYCFUNCTION 13
+#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 14
+#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 15
+#define SPEC_FAIL_PYCFUNCTION_NOARGS 16
+#define SPEC_FAIL_BAD_CALL_FLAGS 17
+#define SPEC_FAIL_CLASS 18
 
-#define SPEC_FAIL_PYCFUNCTION 10
-#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13
-#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14
-#define SPEC_FAIL_PYCFUNCTION_NOARGS 15
-#define SPEC_FAIL_BAD_CALL_FLAGS 16
-#define SPEC_FAIL_PYTHON_FUNCTION 17
-#define SPEC_FAIL_IMMUTABLE_CLASS 18
 
 static int
 specialize_module_load_attr(
@@ -1236,6 +1241,69 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins
     return 0;
 }
 
+static int
+specialize_class_call(
+    PyObject *callable, _Py_CODEUNIT *instr,
+    int nargs, SpecializedCacheEntry *cache)
+{
+    SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS);
+    return -1;
+}
+
+static int
+specialize_py_call(
+    PyFunctionObject *func, _Py_CODEUNIT *instr,
+    int nargs, SpecializedCacheEntry *cache)
+{
+    _PyCallCache *cache1 = &cache[-1].call;
+    /* Exclude generator or coroutines for now */
+    PyCodeObject *code = (PyCodeObject *)func->func_code;
+    int flags = code->co_flags;
+    if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
+        return -1;
+    }
+    if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
+        return -1;
+    }
+    if ((flags & CO_OPTIMIZED) == 0) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
+        return -1;
+    }
+    if (code->co_nfreevars) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
+        return -1;
+    }
+    int argcount = code->co_argcount;
+    int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
+    assert(defcount <= argcount);
+    int min_args = argcount-defcount;
+    if (nargs > argcount || nargs < min_args) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
+        return -1;
+    }
+    assert(nargs <= argcount && nargs >= min_args);
+    int defstart = nargs - min_args;
+    int deflen = argcount - nargs;
+    assert(defstart >= 0 && deflen >= 0);
+    assert(deflen == 0 || func->func_defaults != NULL);
+    if (defstart > 0xffff || deflen > 0xffff) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE);
+        return -1;
+    }
+    int version = _PyFunction_GetVersionForCurrentState(func);
+    if (version == 0) {
+        SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS);
+        return -1;
+    }
+    cache1->func_version = version;
+    cache1->defaults_start = defstart;
+    cache1->defaults_len = deflen;
+    *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr));
+    return 0;
+}
+
 #if COLLECT_SPECIALIZATION_STATS_DETAILED
 static int
 builtin_call_fail_kind(int ml_flags)
@@ -1315,11 +1383,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
 static int
 call_fail_kind(PyObject *callable)
 {
-    if (PyFunction_Check(callable)) {
-        return SPEC_FAIL_PYTHON_FUNCTION;
-    }
-    // new-style bound methods
-    else if (PyInstanceMethod_Check(callable)) {
+    if (PyInstanceMethod_Check(callable)) {
         return SPEC_FAIL_METHOD;
     }
     else if (PyMethod_Check(callable)) {
@@ -1330,17 +1394,14 @@ call_fail_kind(PyObject *callable)
         return SPEC_FAIL_METHOD;
     }
     else if (PyType_Check(callable)) {
-        PyTypeObject *type = Py_TYPE(callable);
-        return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ?
-            SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS;
+        return  SPEC_FAIL_CLASS;
     }
     return SPEC_FAIL_OTHER;
 }
 #endif
 
 /* TODO:
-    - Specialize calling types.
-    - Specialize python function calls.
+    - Specialize calling classes.
 */
 int
 _Py_Specialize_CallFunction(
@@ -1352,9 +1413,15 @@ _Py_Specialize_CallFunction(
     if (PyCFunction_CheckExact(callable)) {
         fail = specialize_c_call(callable, instr, nargs, cache, builtins);
     }
+    else if (PyFunction_Check(callable)) {
+        fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache);
+    }
+    else if (PyType_Check(callable)) {
+        fail = specialize_class_call(callable, instr, nargs, cache);
+    }
     else {
         SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable));
-        fail = 1;
+        fail = -1;
     }
     _PyAdaptiveEntry *cache0 = &cache->adaptive;
     if (fail) {



More information about the Python-checkins mailing list