[Python-checkins] gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)

gvanrossum webhook-mailer at python.org
Mon Jul 10 19:04:29 EDT 2023


https://github.com/python/cpython/commit/22988c323ad621b9f47b6cb640b80ac806e26368
commit: 22988c323ad621b9f47b6cb640b80ac806e26368
branch: main
author: Guido van Rossum <guido at python.org>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2023-07-10T16:04:26-07:00
summary:

gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)

- Hand-written uops JUMP_IF_{TRUE,FALSE}.
  These peek at the top of the stack.
  The jump target (in superblock space) is absolute.

- Hand-written translation for POP_JUMP_IF_{TRUE,FALSE},
  assuming the jump is unlikely.
  Once we implement jump-likelihood profiling,
  we can implement the jump-unlikely case (in another PR).

- Tests (including some test cleanup).

- Improvements to len(ex) and ex[i] to expose the whole trace.

files:
M Lib/test/test_capi/test_misc.py
M Python/ceval.c
M Python/opcode_metadata.h
M Python/optimizer.c
M Tools/cases_generator/generate_cases.py

diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
index 5f39f23401c3f..fd27bd06097f6 100644
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@@ -2347,11 +2347,12 @@ def func():
 
 @contextlib.contextmanager
 def temporary_optimizer(opt):
+    old_opt = _testinternalcapi.get_optimizer()
     _testinternalcapi.set_optimizer(opt)
     try:
         yield
     finally:
-        _testinternalcapi.set_optimizer(None)
+        _testinternalcapi.set_optimizer(old_opt)
 
 
 @contextlib.contextmanager
@@ -2420,8 +2421,8 @@ def long_loop():
             self.assertEqual(opt.get_count(), 10)
 
 
-
-def get_first_executor(code):
+def get_first_executor(func):
+    code = func.__code__
     co_code = code.co_code
     JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"]
     for i in range(0, len(co_code), 2):
@@ -2446,13 +2447,7 @@ def testfunc(x):
         with temporary_optimizer(opt):
             testfunc(1000)
 
-        ex = None
-        for offset in range(0, len(testfunc.__code__.co_code), 2):
-            try:
-                ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
-                break
-            except ValueError:
-                pass
+        ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
         uops = {opname for opname, _ in ex}
         self.assertIn("SAVE_IP", uops)
@@ -2493,11 +2488,13 @@ def many_vars():
 
         opt = _testinternalcapi.get_uop_optimizer()
         with temporary_optimizer(opt):
-            ex = get_first_executor(many_vars.__code__)
+            ex = get_first_executor(many_vars)
             self.assertIsNone(ex)
             many_vars()
-            ex = get_first_executor(many_vars.__code__)
-            self.assertIn(("LOAD_FAST", 259), list(ex))
+
+        ex = get_first_executor(many_vars)
+        self.assertIsNotNone(ex)
+        self.assertIn(("LOAD_FAST", 259), list(ex))
 
     def test_unspecialized_unpack(self):
         # An example of an unspecialized opcode
@@ -2516,17 +2513,43 @@ def testfunc(x):
         with temporary_optimizer(opt):
             testfunc(10)
 
-        ex = None
-        for offset in range(0, len(testfunc.__code__.co_code), 2):
-            try:
-                ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
-                break
-            except ValueError:
-                pass
+        ex = get_first_executor(testfunc)
         self.assertIsNotNone(ex)
         uops = {opname for opname, _ in ex}
         self.assertIn("UNPACK_SEQUENCE", uops)
 
+    def test_pop_jump_if_false(self):
+        def testfunc(n):
+            i = 0
+            while i < n:
+                i += 1
+
+        opt = _testinternalcapi.get_uop_optimizer()
+
+        with temporary_optimizer(opt):
+            testfunc(10)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = {opname for opname, _ in ex}
+        self.assertIn("_POP_JUMP_IF_FALSE", uops)
+
+    def test_pop_jump_if_true(self):
+        def testfunc(n):
+            i = 0
+            while not i >= n:
+                i += 1
+
+        opt = _testinternalcapi.get_uop_optimizer()
+
+        with temporary_optimizer(opt):
+            testfunc(10)
+
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        uops = {opname for opname, _ in ex}
+        self.assertIn("_POP_JUMP_IF_TRUE", uops)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Python/ceval.c b/Python/ceval.c
index da1135549a255..866acd2dd69c7 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -2751,7 +2751,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
         operand = self->trace[pc].operand;
         oparg = (int)operand;
         DPRINTF(3,
-                "  uop %s, operand %" PRIu64 ", stack_level %d\n",
+                "%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
+                pc,
                 opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
                 operand,
                 (int)(stack_pointer - _PyFrame_Stackbase(frame)));
@@ -2763,6 +2764,25 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
 #define ENABLE_SPECIALIZATION 0
 #include "executor_cases.c.h"
 
+            // NOTE: These pop-jumps move the uop pc, not the bytecode ip
+            case _POP_JUMP_IF_FALSE:
+            {
+                if (Py_IsFalse(stack_pointer[-1])) {
+                    pc = oparg;
+                }
+                stack_pointer--;
+                break;
+            }
+
+            case _POP_JUMP_IF_TRUE:
+            {
+                if (Py_IsTrue(stack_pointer[-1])) {
+                    pc = oparg;
+                }
+                stack_pointer--;
+                break;
+            }
+
             case SAVE_IP:
             {
                 frame->prev_instr = ip_offset + oparg;
diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h
index 92768e60f62ab..64923e61fa459 100644
--- a/Python/opcode_metadata.h
+++ b/Python/opcode_metadata.h
@@ -21,18 +21,20 @@
 
 #define EXIT_TRACE 300
 #define SAVE_IP 301
-#define _GUARD_BOTH_INT 302
-#define _BINARY_OP_MULTIPLY_INT 303
-#define _BINARY_OP_ADD_INT 304
-#define _BINARY_OP_SUBTRACT_INT 305
-#define _GUARD_BOTH_FLOAT 306
-#define _BINARY_OP_MULTIPLY_FLOAT 307
-#define _BINARY_OP_ADD_FLOAT 308
-#define _BINARY_OP_SUBTRACT_FLOAT 309
-#define _GUARD_BOTH_UNICODE 310
-#define _BINARY_OP_ADD_UNICODE 311
-#define _LOAD_LOCALS 312
-#define _LOAD_FROM_DICT_OR_GLOBALS 313
+#define _POP_JUMP_IF_FALSE 302
+#define _POP_JUMP_IF_TRUE 303
+#define _GUARD_BOTH_INT 304
+#define _BINARY_OP_MULTIPLY_INT 305
+#define _BINARY_OP_ADD_INT 306
+#define _BINARY_OP_SUBTRACT_INT 307
+#define _GUARD_BOTH_FLOAT 308
+#define _BINARY_OP_MULTIPLY_FLOAT 309
+#define _BINARY_OP_ADD_FLOAT 310
+#define _BINARY_OP_SUBTRACT_FLOAT 311
+#define _GUARD_BOTH_UNICODE 312
+#define _BINARY_OP_ADD_UNICODE 313
+#define _LOAD_LOCALS 314
+#define _LOAD_FROM_DICT_OR_GLOBALS 315
 
 #ifndef NEED_OPCODE_METADATA
 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
@@ -1294,18 +1296,20 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
 const char * const _PyOpcode_uop_name[512] = {
     [300] = "EXIT_TRACE",
     [301] = "SAVE_IP",
-    [302] = "_GUARD_BOTH_INT",
-    [303] = "_BINARY_OP_MULTIPLY_INT",
-    [304] = "_BINARY_OP_ADD_INT",
-    [305] = "_BINARY_OP_SUBTRACT_INT",
-    [306] = "_GUARD_BOTH_FLOAT",
-    [307] = "_BINARY_OP_MULTIPLY_FLOAT",
-    [308] = "_BINARY_OP_ADD_FLOAT",
-    [309] = "_BINARY_OP_SUBTRACT_FLOAT",
-    [310] = "_GUARD_BOTH_UNICODE",
-    [311] = "_BINARY_OP_ADD_UNICODE",
-    [312] = "_LOAD_LOCALS",
-    [313] = "_LOAD_FROM_DICT_OR_GLOBALS",
+    [302] = "_POP_JUMP_IF_FALSE",
+    [303] = "_POP_JUMP_IF_TRUE",
+    [304] = "_GUARD_BOTH_INT",
+    [305] = "_BINARY_OP_MULTIPLY_INT",
+    [306] = "_BINARY_OP_ADD_INT",
+    [307] = "_BINARY_OP_SUBTRACT_INT",
+    [308] = "_GUARD_BOTH_FLOAT",
+    [309] = "_BINARY_OP_MULTIPLY_FLOAT",
+    [310] = "_BINARY_OP_ADD_FLOAT",
+    [311] = "_BINARY_OP_SUBTRACT_FLOAT",
+    [312] = "_GUARD_BOTH_UNICODE",
+    [313] = "_BINARY_OP_ADD_UNICODE",
+    [314] = "_LOAD_LOCALS",
+    [315] = "_LOAD_FROM_DICT_OR_GLOBALS",
 };
 #endif // NEED_OPCODE_METADATA
 #endif
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 1d731ed2309c3..48c29f55bee46 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {
 
 static const char *
 uop_name(int index) {
-    if (index < EXIT_TRACE) {
+    if (index < 256) {
         return _PyOpcode_OpName[index];
     }
     return _PyOpcode_uop_name[index];
@@ -316,9 +316,9 @@ uop_name(int index) {
 static Py_ssize_t
 uop_len(_PyUOpExecutorObject *self)
 {
-    int count = 1;
+    int count = 0;
     for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) {
-        if (self->trace[count-1].opcode == EXIT_TRACE) {
+        if (self->trace[count].opcode == 0) {
             break;
         }
     }
@@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
 static PyObject *
 uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
 {
-    for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) {
-        if (self->trace[i].opcode == EXIT_TRACE) {
-            break;
-        }
-        if (i != index) {
-            continue;
-        }
-        const char *name = uop_name(self->trace[i].opcode);
-        PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
-        if (oname == NULL) {
-            return NULL;
-        }
-        PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand);
-        if (operand == NULL) {
-            Py_DECREF(oname);
-            return NULL;
-        }
-        PyObject *args[2] = { oname, operand };
-        return _PyTuple_FromArraySteal(args, 2);
+    Py_ssize_t len = uop_len(self);
+    if (index < 0 || index >= len) {
+        PyErr_SetNone(PyExc_IndexError);
+        return NULL;
     }
-    PyErr_SetNone(PyExc_IndexError);
-    return NULL;
+    const char *name = uop_name(self->trace[index].opcode);
+    if (name == NULL) {
+        name = "<nil>";
+    }
+    PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
+    if (oname == NULL) {
+        return NULL;
+    }
+    PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
+    if (operand == NULL) {
+        Py_DECREF(oname);
+        return NULL;
+    }
+    PyObject *args[2] = { oname, operand };
+    return _PyTuple_FromArraySteal(args, 2);
 }
 
 PySequenceMethods uop_as_sequence = {
@@ -372,12 +370,13 @@ translate_bytecode_to_trace(
     PyCodeObject *code,
     _Py_CODEUNIT *instr,
     _PyUOpInstruction *trace,
-    int max_length)
+    int buffer_size)
 {
 #ifdef Py_DEBUG
     _Py_CODEUNIT *initial_instr = instr;
 #endif
     int trace_length = 0;
+    int max_length = buffer_size;
 
 #ifdef Py_DEBUG
     char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -401,6 +400,14 @@ translate_bytecode_to_trace(
     trace[trace_length].operand = (OPERAND); \
     trace_length++;
 
+#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
+    DPRINTF(2, "    ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
+            (INDEX), \
+            (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
+            (uint64_t)(OPERAND)); \
+    trace[(INDEX)].opcode = (OPCODE); \
+    trace[(INDEX)].operand = (OPERAND);
+
     DPRINTF(4,
             "Optimizing %s (%s:%d) at byte offset %ld\n",
             PyUnicode_AsUTF8(code->co_qualname),
@@ -409,7 +416,7 @@ translate_bytecode_to_trace(
             2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive));
 
     for (;;) {
-        ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
+        ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive);
         int opcode = instr->op.code;
         int oparg = instr->op.arg;
         int extras = 0;
@@ -420,12 +427,35 @@ translate_bytecode_to_trace(
             oparg = (oparg << 8) | instr->op.arg;
         }
         if (opcode == ENTER_EXECUTOR) {
-            _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
+            _PyExecutorObject *executor =
+                (_PyExecutorObject *)code->co_executors->executors[oparg&255];
             opcode = executor->vm_data.opcode;
             DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
             oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
         }
         switch (opcode) {
+
+            case POP_JUMP_IF_FALSE:
+            case POP_JUMP_IF_TRUE:
+            {
+                // Assume jump unlikely (TODO: handle jump likely case)
+                // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
+                if (trace_length + 5 > max_length) {
+                    DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
+                    goto done;
+                }
+                _Py_CODEUNIT *target_instr =
+                    instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
+                max_length -= 2;  // Really the start of the stubs
+                int uopcode = opcode == POP_JUMP_IF_TRUE ?
+                    _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
+                ADD_TO_TRACE(uopcode, max_length);
+                ADD_TO_STUB(max_length, SAVE_IP,
+                            target_instr - (_Py_CODEUNIT *)code->co_code_adaptive);
+                ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
+                break;
+            }
+
             default:
             {
                 const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@@ -503,6 +533,30 @@ translate_bytecode_to_trace(
                 code->co_firstlineno,
                 2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive),
                 trace_length);
+        if (max_length < buffer_size && trace_length < max_length) {
+            // Move the stubs back to be immediately after the main trace
+            // (which ends at trace_length)
+            DPRINTF(2,
+                    "Moving %d stub uops back by %d\n",
+                    buffer_size - max_length,
+                    max_length - trace_length);
+            memmove(trace + trace_length,
+                    trace + max_length,
+                    (buffer_size - max_length) * sizeof(_PyUOpInstruction));
+            // Patch up the jump targets
+            for (int i = 0; i < trace_length; i++) {
+                if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
+                    trace[i].opcode == _POP_JUMP_IF_TRUE)
+                {
+                    int target = trace[i].operand;
+                    if (target >= max_length) {
+                        target += trace_length - max_length;
+                        trace[i].operand = target;
+                    }
+                }
+            }
+            trace_length += buffer_size - max_length;
+        }
         return trace_length;
     }
     else {
@@ -539,6 +593,9 @@ uop_optimize(
     }
     executor->base.execute = _PyUopExecute;
     memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
+        if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) {
+            executor->trace[trace_length].opcode = 0;  // Sentinel
+        }
     *exec_ptr = (_PyExecutorObject *)executor;
     return 1;
 }
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 14269ca8cbe75..932d0c14d398a 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -1338,12 +1338,17 @@ def write_pseudo_instrs(self) -> None:
     def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None:
         """Write '#define XXX NNN' for each uop"""
         counter = 300  # TODO: Avoid collision with pseudo instructions
+
         def add(name: str) -> None:
             nonlocal counter
             self.out.emit(make_text(name, counter))
             counter += 1
+
         add("EXIT_TRACE")
         add("SAVE_IP")
+        add("_POP_JUMP_IF_FALSE")
+        add("_POP_JUMP_IF_TRUE")
+
         for instr in self.instrs.values():
             if instr.kind == "op" and instr.is_viable_uop():
                 add(instr.name)



More information about the Python-checkins mailing list