[Python-checkins] gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)
gvanrossum
webhook-mailer at python.org
Mon Jul 10 19:04:29 EDT 2023
https://github.com/python/cpython/commit/22988c323ad621b9f47b6cb640b80ac806e26368
commit: 22988c323ad621b9f47b6cb640b80ac806e26368
branch: main
author: Guido van Rossum <guido at python.org>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2023-07-10T16:04:26-07:00
summary:
gh-106529: Implement POP_JUMP_IF_XXX uops (#106551)
- Hand-written uops JUMP_IF_{TRUE,FALSE}.
These peek at the top of the stack.
The jump target (in superblock space) is absolute.
- Hand-written translation for POP_JUMP_IF_{TRUE,FALSE},
assuming the jump is unlikely.
Once we implement jump-likelihood profiling,
we can implement the jump-unlikely case (in another PR).
- Tests (including some test cleanup).
- Improvements to len(ex) and ex[i] to expose the whole trace.
files:
M Lib/test/test_capi/test_misc.py
M Python/ceval.c
M Python/opcode_metadata.h
M Python/optimizer.c
M Tools/cases_generator/generate_cases.py
diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
index 5f39f23401c3f..fd27bd06097f6 100644
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@@ -2347,11 +2347,12 @@ def func():
@contextlib.contextmanager
def temporary_optimizer(opt):
+ old_opt = _testinternalcapi.get_optimizer()
_testinternalcapi.set_optimizer(opt)
try:
yield
finally:
- _testinternalcapi.set_optimizer(None)
+ _testinternalcapi.set_optimizer(old_opt)
@contextlib.contextmanager
@@ -2420,8 +2421,8 @@ def long_loop():
self.assertEqual(opt.get_count(), 10)
-
-def get_first_executor(code):
+def get_first_executor(func):
+ code = func.__code__
co_code = code.co_code
JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"]
for i in range(0, len(co_code), 2):
@@ -2446,13 +2447,7 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(1000)
- ex = None
- for offset in range(0, len(testfunc.__code__.co_code), 2):
- try:
- ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
- break
- except ValueError:
- pass
+ ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("SAVE_IP", uops)
@@ -2493,11 +2488,13 @@ def many_vars():
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
- ex = get_first_executor(many_vars.__code__)
+ ex = get_first_executor(many_vars)
self.assertIsNone(ex)
many_vars()
- ex = get_first_executor(many_vars.__code__)
- self.assertIn(("LOAD_FAST", 259), list(ex))
+
+ ex = get_first_executor(many_vars)
+ self.assertIsNotNone(ex)
+ self.assertIn(("LOAD_FAST", 259), list(ex))
def test_unspecialized_unpack(self):
# An example of an unspecialized opcode
@@ -2516,17 +2513,43 @@ def testfunc(x):
with temporary_optimizer(opt):
testfunc(10)
- ex = None
- for offset in range(0, len(testfunc.__code__.co_code), 2):
- try:
- ex = _testinternalcapi.get_executor(testfunc.__code__, offset)
- break
- except ValueError:
- pass
+ ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _ in ex}
self.assertIn("UNPACK_SEQUENCE", uops)
+ def test_pop_jump_if_false(self):
+ def testfunc(n):
+ i = 0
+ while i < n:
+ i += 1
+
+ opt = _testinternalcapi.get_uop_optimizer()
+
+ with temporary_optimizer(opt):
+ testfunc(10)
+
+ ex = get_first_executor(testfunc)
+ self.assertIsNotNone(ex)
+ uops = {opname for opname, _ in ex}
+ self.assertIn("_POP_JUMP_IF_FALSE", uops)
+
+ def test_pop_jump_if_true(self):
+ def testfunc(n):
+ i = 0
+ while not i >= n:
+ i += 1
+
+ opt = _testinternalcapi.get_uop_optimizer()
+
+ with temporary_optimizer(opt):
+ testfunc(10)
+
+ ex = get_first_executor(testfunc)
+ self.assertIsNotNone(ex)
+ uops = {opname for opname, _ in ex}
+ self.assertIn("_POP_JUMP_IF_TRUE", uops)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Python/ceval.c b/Python/ceval.c
index da1135549a255..866acd2dd69c7 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -2751,7 +2751,8 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand = self->trace[pc].operand;
oparg = (int)operand;
DPRINTF(3,
- " uop %s, operand %" PRIu64 ", stack_level %d\n",
+ "%4d: uop %s, operand %" PRIu64 ", stack_level %d\n",
+ pc,
opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode],
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
@@ -2763,6 +2764,25 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
#define ENABLE_SPECIALIZATION 0
#include "executor_cases.c.h"
+ // NOTE: These pop-jumps move the uop pc, not the bytecode ip
+ case _POP_JUMP_IF_FALSE:
+ {
+ if (Py_IsFalse(stack_pointer[-1])) {
+ pc = oparg;
+ }
+ stack_pointer--;
+ break;
+ }
+
+ case _POP_JUMP_IF_TRUE:
+ {
+ if (Py_IsTrue(stack_pointer[-1])) {
+ pc = oparg;
+ }
+ stack_pointer--;
+ break;
+ }
+
case SAVE_IP:
{
frame->prev_instr = ip_offset + oparg;
diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h
index 92768e60f62ab..64923e61fa459 100644
--- a/Python/opcode_metadata.h
+++ b/Python/opcode_metadata.h
@@ -21,18 +21,20 @@
#define EXIT_TRACE 300
#define SAVE_IP 301
-#define _GUARD_BOTH_INT 302
-#define _BINARY_OP_MULTIPLY_INT 303
-#define _BINARY_OP_ADD_INT 304
-#define _BINARY_OP_SUBTRACT_INT 305
-#define _GUARD_BOTH_FLOAT 306
-#define _BINARY_OP_MULTIPLY_FLOAT 307
-#define _BINARY_OP_ADD_FLOAT 308
-#define _BINARY_OP_SUBTRACT_FLOAT 309
-#define _GUARD_BOTH_UNICODE 310
-#define _BINARY_OP_ADD_UNICODE 311
-#define _LOAD_LOCALS 312
-#define _LOAD_FROM_DICT_OR_GLOBALS 313
+#define _POP_JUMP_IF_FALSE 302
+#define _POP_JUMP_IF_TRUE 303
+#define _GUARD_BOTH_INT 304
+#define _BINARY_OP_MULTIPLY_INT 305
+#define _BINARY_OP_ADD_INT 306
+#define _BINARY_OP_SUBTRACT_INT 307
+#define _GUARD_BOTH_FLOAT 308
+#define _BINARY_OP_MULTIPLY_FLOAT 309
+#define _BINARY_OP_ADD_FLOAT 310
+#define _BINARY_OP_SUBTRACT_FLOAT 311
+#define _GUARD_BOTH_UNICODE 312
+#define _BINARY_OP_ADD_UNICODE 313
+#define _LOAD_LOCALS 314
+#define _LOAD_FROM_DICT_OR_GLOBALS 315
#ifndef NEED_OPCODE_METADATA
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
@@ -1294,18 +1296,20 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
const char * const _PyOpcode_uop_name[512] = {
[300] = "EXIT_TRACE",
[301] = "SAVE_IP",
- [302] = "_GUARD_BOTH_INT",
- [303] = "_BINARY_OP_MULTIPLY_INT",
- [304] = "_BINARY_OP_ADD_INT",
- [305] = "_BINARY_OP_SUBTRACT_INT",
- [306] = "_GUARD_BOTH_FLOAT",
- [307] = "_BINARY_OP_MULTIPLY_FLOAT",
- [308] = "_BINARY_OP_ADD_FLOAT",
- [309] = "_BINARY_OP_SUBTRACT_FLOAT",
- [310] = "_GUARD_BOTH_UNICODE",
- [311] = "_BINARY_OP_ADD_UNICODE",
- [312] = "_LOAD_LOCALS",
- [313] = "_LOAD_FROM_DICT_OR_GLOBALS",
+ [302] = "_POP_JUMP_IF_FALSE",
+ [303] = "_POP_JUMP_IF_TRUE",
+ [304] = "_GUARD_BOTH_INT",
+ [305] = "_BINARY_OP_MULTIPLY_INT",
+ [306] = "_BINARY_OP_ADD_INT",
+ [307] = "_BINARY_OP_SUBTRACT_INT",
+ [308] = "_GUARD_BOTH_FLOAT",
+ [309] = "_BINARY_OP_MULTIPLY_FLOAT",
+ [310] = "_BINARY_OP_ADD_FLOAT",
+ [311] = "_BINARY_OP_SUBTRACT_FLOAT",
+ [312] = "_GUARD_BOTH_UNICODE",
+ [313] = "_BINARY_OP_ADD_UNICODE",
+ [314] = "_LOAD_LOCALS",
+ [315] = "_LOAD_FROM_DICT_OR_GLOBALS",
};
#endif // NEED_OPCODE_METADATA
#endif
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 1d731ed2309c3..48c29f55bee46 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -307,7 +307,7 @@ uop_dealloc(_PyUOpExecutorObject *self) {
static const char *
uop_name(int index) {
- if (index < EXIT_TRACE) {
+ if (index < 256) {
return _PyOpcode_OpName[index];
}
return _PyOpcode_uop_name[index];
@@ -316,9 +316,9 @@ uop_name(int index) {
static Py_ssize_t
uop_len(_PyUOpExecutorObject *self)
{
- int count = 1;
+ int count = 0;
for (; count < _Py_UOP_MAX_TRACE_LENGTH; count++) {
- if (self->trace[count-1].opcode == EXIT_TRACE) {
+ if (self->trace[count].opcode == 0) {
break;
}
}
@@ -328,28 +328,26 @@ uop_len(_PyUOpExecutorObject *self)
static PyObject *
uop_item(_PyUOpExecutorObject *self, Py_ssize_t index)
{
- for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) {
- if (self->trace[i].opcode == EXIT_TRACE) {
- break;
- }
- if (i != index) {
- continue;
- }
- const char *name = uop_name(self->trace[i].opcode);
- PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
- if (oname == NULL) {
- return NULL;
- }
- PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[i].operand);
- if (operand == NULL) {
- Py_DECREF(oname);
- return NULL;
- }
- PyObject *args[2] = { oname, operand };
- return _PyTuple_FromArraySteal(args, 2);
+ Py_ssize_t len = uop_len(self);
+ if (index < 0 || index >= len) {
+ PyErr_SetNone(PyExc_IndexError);
+ return NULL;
}
- PyErr_SetNone(PyExc_IndexError);
- return NULL;
+ const char *name = uop_name(self->trace[index].opcode);
+ if (name == NULL) {
+ name = "<nil>";
+ }
+ PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
+ if (oname == NULL) {
+ return NULL;
+ }
+ PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
+ if (operand == NULL) {
+ Py_DECREF(oname);
+ return NULL;
+ }
+ PyObject *args[2] = { oname, operand };
+ return _PyTuple_FromArraySteal(args, 2);
}
PySequenceMethods uop_as_sequence = {
@@ -372,12 +370,13 @@ translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
- int max_length)
+ int buffer_size)
{
#ifdef Py_DEBUG
_Py_CODEUNIT *initial_instr = instr;
#endif
int trace_length = 0;
+ int max_length = buffer_size;
#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -401,6 +400,14 @@ translate_bytecode_to_trace(
trace[trace_length].operand = (OPERAND); \
trace_length++;
+#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \
+ DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \
+ (INDEX), \
+ (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)], \
+ (uint64_t)(OPERAND)); \
+ trace[(INDEX)].opcode = (OPCODE); \
+ trace[(INDEX)].operand = (OPERAND);
+
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
@@ -409,7 +416,7 @@ translate_bytecode_to_trace(
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive));
for (;;) {
- ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
+ ADD_TO_TRACE(SAVE_IP, instr - (_Py_CODEUNIT *)code->co_code_adaptive);
int opcode = instr->op.code;
int oparg = instr->op.arg;
int extras = 0;
@@ -420,12 +427,35 @@ translate_bytecode_to_trace(
oparg = (oparg << 8) | instr->op.arg;
}
if (opcode == ENTER_EXECUTOR) {
- _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
+ _PyExecutorObject *executor =
+ (_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
}
switch (opcode) {
+
+ case POP_JUMP_IF_FALSE:
+ case POP_JUMP_IF_TRUE:
+ {
+ // Assume jump unlikely (TODO: handle jump likely case)
+ // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
+ if (trace_length + 5 > max_length) {
+ DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
+ goto done;
+ }
+ _Py_CODEUNIT *target_instr =
+ instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
+ max_length -= 2; // Really the start of the stubs
+ int uopcode = opcode == POP_JUMP_IF_TRUE ?
+ _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
+ ADD_TO_TRACE(uopcode, max_length);
+ ADD_TO_STUB(max_length, SAVE_IP,
+ target_instr - (_Py_CODEUNIT *)code->co_code_adaptive);
+ ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0);
+ break;
+ }
+
default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@@ -503,6 +533,30 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * (long)(initial_instr - (_Py_CODEUNIT *)code->co_code_adaptive),
trace_length);
+ if (max_length < buffer_size && trace_length < max_length) {
+ // Move the stubs back to be immediately after the main trace
+ // (which ends at trace_length)
+ DPRINTF(2,
+ "Moving %d stub uops back by %d\n",
+ buffer_size - max_length,
+ max_length - trace_length);
+ memmove(trace + trace_length,
+ trace + max_length,
+ (buffer_size - max_length) * sizeof(_PyUOpInstruction));
+ // Patch up the jump targets
+ for (int i = 0; i < trace_length; i++) {
+ if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
+ trace[i].opcode == _POP_JUMP_IF_TRUE)
+ {
+ int target = trace[i].operand;
+ if (target >= max_length) {
+ target += trace_length - max_length;
+ trace[i].operand = target;
+ }
+ }
+ }
+ trace_length += buffer_size - max_length;
+ }
return trace_length;
}
else {
@@ -539,6 +593,9 @@ uop_optimize(
}
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
+ if (trace_length < _Py_UOP_MAX_TRACE_LENGTH) {
+ executor->trace[trace_length].opcode = 0; // Sentinel
+ }
*exec_ptr = (_PyExecutorObject *)executor;
return 1;
}
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 14269ca8cbe75..932d0c14d398a 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -1338,12 +1338,17 @@ def write_pseudo_instrs(self) -> None:
def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None:
"""Write '#define XXX NNN' for each uop"""
counter = 300 # TODO: Avoid collision with pseudo instructions
+
def add(name: str) -> None:
nonlocal counter
self.out.emit(make_text(name, counter))
counter += 1
+
add("EXIT_TRACE")
add("SAVE_IP")
+ add("_POP_JUMP_IF_FALSE")
+ add("_POP_JUMP_IF_TRUE")
+
for instr in self.instrs.values():
if instr.kind == "op" and instr.is_viable_uop():
add(instr.name)
More information about the Python-checkins
mailing list