[Python-checkins] GH-98831: Update generate_cases.py: register inst, opcode_metadata.h (#100735)

gvanrossum webhook-mailer at python.org
Thu Jan 5 16:01:14 EST 2023


https://github.com/python/cpython/commit/14b7f00fdf9890739b43a3e198e4ce93f54c0552
commit: 14b7f00fdf9890739b43a3e198e4ce93f54c0552
branch: main
author: Guido van Rossum <guido at python.org>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2023-01-05T13:01:07-08:00
summary:

GH-98831: Update generate_cases.py: register inst, opcode_metadata.h (#100735)

(These aren't used yet, but may be coming soon,
and it's easier to keep this tool the same between branches.)

Added a sanity check for all this to compile.c.

Co-authored-by: Irit Katriel <iritkatriel at yahoo.com>

files:
A Python/opcode_metadata.h
M Makefile.pre.in
M Python/bytecodes.c
M Python/compile.c
M Tools/cases_generator/generate_cases.py
M Tools/cases_generator/parser.py

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 397fc996192d..a6b5f212160f 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1456,6 +1456,15 @@ regen-cases:
 		-i $(srcdir)/Python/bytecodes.c \
 		-o $(srcdir)/Python/generated_cases.c.h.new
 	$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
+	# Regenerate Python/opcode_metadata.h from Python/bytecodes.c
+	# using Tools/cases_generator/generate_cases.py --metadata
+	PYTHONPATH=$(srcdir)/Tools/cases_generator \
+	$(PYTHON_FOR_REGEN) \
+	    $(srcdir)/Tools/cases_generator/generate_cases.py \
+		--metadata \
+		-i $(srcdir)/Python/bytecodes.c \
+		-o $(srcdir)/Python/opcode_metadata.h.new
+	$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
 
 Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/condvar.h $(srcdir)/Python/generated_cases.c.h
 
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 9283f590582a..04ba33ebef80 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -764,7 +764,7 @@ dummy_func(
             ERROR_IF(w == NULL, error);
         }
 
-        inst(YIELD_VALUE, (retval --)) {
+        inst(YIELD_VALUE, (retval -- unused)) {
             // NOTE: It's important that YIELD_VALUE never raises an exception!
             // The compiler treats any exception raised here as a failed close()
             // or throw() call.
diff --git a/Python/compile.c b/Python/compile.c
index ff29fb42c5d1..e7804469fec6 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -36,6 +36,8 @@
 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
 #include "pycore_symtable.h"      // PySTEntryObject
 
+#include "opcode_metadata.h"      // _PyOpcode_opcode_metadata
+
 
 #define DEFAULT_BLOCK_SIZE 16
 #define DEFAULT_CODE_SIZE 128
@@ -8664,6 +8666,31 @@ no_redundant_jumps(cfg_builder *g) {
     return true;
 }
 
+static bool
+opcode_metadata_is_sane(cfg_builder *g) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+        for (int i = 0; i < b->b_iused; i++) {
+            struct instr *instr = &b->b_instr[i];
+            int opcode = instr->i_opcode;
+            assert(opcode <= MAX_REAL_OPCODE); 
+            int pushed = _PyOpcode_opcode_metadata[opcode].n_pushed;
+            int popped = _PyOpcode_opcode_metadata[opcode].n_popped;
+            assert((pushed < 0) == (popped < 0));
+            if (pushed >= 0) {
+                assert(_PyOpcode_opcode_metadata[opcode].valid_entry);
+                int effect = stack_effect(opcode, instr->i_oparg, -1);
+                if (effect != pushed - popped) {
+                   fprintf(stderr,
+                           "op=%d: stack_effect (%d) != pushed (%d) - popped (%d)\n",
+                           opcode, effect, pushed, popped);
+                   return false;
+                }
+            }
+        }
+    }
+    return true;
+}
+
 static bool
 no_empty_basic_blocks(cfg_builder *g) {
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
@@ -8847,6 +8874,7 @@ assemble(struct compiler *c, int addNone)
     }
 
     assert(no_redundant_jumps(g));
+    assert(opcode_metadata_is_sane(g));
 
     /* Can't modify the bytecode after computing jump offsets. */
     assemble_jump_offsets(g->g_entryblock);
diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h
new file mode 100644
index 000000000000..2d539896844d
--- /dev/null
+++ b/Python/opcode_metadata.h
@@ -0,0 +1,187 @@
+// This file is generated by Tools/cases_generator/generate_cases.py --metadata
+// from Python/bytecodes.c
+// Do not edit!
+enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };
+static const struct {
+    short n_popped;
+    short n_pushed;
+    enum Direction dir_op1;
+    enum Direction dir_op2;
+    enum Direction dir_op3;
+    bool valid_entry;
+    char instr_format[10];
+} _PyOpcode_opcode_metadata[256] = {
+    [NOP] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [RESUME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_CLOSURE] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_FAST_CHECK] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_FAST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_CONST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_FAST] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_FAST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
+    [LOAD_FAST__LOAD_CONST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
+    [STORE_FAST__LOAD_FAST] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
+    [STORE_FAST__STORE_FAST] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
+    [LOAD_CONST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
+    [POP_TOP] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [PUSH_NULL] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [END_FOR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNARY_POSITIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNARY_NEGATIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNARY_NOT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNARY_INVERT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BINARY_OP_MULTIPLY_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_MULTIPLY_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_SUBTRACT_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_SUBTRACT_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_ADD_UNICODE] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_INPLACE_ADD_UNICODE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BINARY_OP_ADD_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_OP_ADD_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [BINARY_SUBSCR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [BINARY_SLICE] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_SLICE] = { 4, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BINARY_SUBSCR_LIST_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [BINARY_SUBSCR_TUPLE_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [BINARY_SUBSCR_DICT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [BINARY_SUBSCR_GETITEM] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [LIST_APPEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [SET_ADD] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_SUBSCR] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [STORE_SUBSCR_LIST_INT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [STORE_SUBSCR_DICT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [DELETE_SUBSCR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [PRINT_EXPR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [RAISE_VARARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [INTERPRETER_EXIT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [RETURN_VALUE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_AITER] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_ANEXT] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_AWAITABLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [SEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [ASYNC_GEN_WRAP] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [YIELD_VALUE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [POP_EXCEPT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [RERAISE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [PREP_RERAISE_STAR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [END_ASYNC_FOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CLEANUP_THROW] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STOPITERATION_ERROR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ASSERTION_ERROR] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_BUILD_CLASS] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_NAME] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DELETE_NAME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNPACK_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNPACK_SEQUENCE_TWO_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNPACK_SEQUENCE_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNPACK_SEQUENCE_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [UNPACK_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_ATTR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [DELETE_ATTR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_GLOBAL] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DELETE_GLOBAL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_NAME] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_GLOBAL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_GLOBAL_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_GLOBAL_BUILTIN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DELETE_FAST] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MAKE_CELL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DELETE_DEREF] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_CLASSDEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_DEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_DEREF] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [COPY_FREE_VARS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_STRING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LIST_TO_TUPLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LIST_EXTEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [SET_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_SET] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [SETUP_ANNOTATIONS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_CONST_KEY_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DICT_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [DICT_MERGE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MAP_ADD] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_INSTANCE_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_WITH_HINT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_SLOT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_PROPERTY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [STORE_ATTR_INSTANCE_VALUE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [STORE_ATTR_WITH_HINT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [STORE_ATTR_SLOT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
+    [COMPARE_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0" },
+    [COMPARE_OP_FLOAT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
+    [COMPARE_OP_INT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
+    [COMPARE_OP_STR_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
+    [IS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CONTAINS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CHECK_EG_MATCH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CHECK_EXC_MATCH] = { 2, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [IMPORT_NAME] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [IMPORT_STAR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [IMPORT_FROM] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [JUMP_FORWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [JUMP_BACKWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [POP_JUMP_IF_FALSE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [POP_JUMP_IF_TRUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [POP_JUMP_IF_NOT_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [POP_JUMP_IF_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [JUMP_IF_FALSE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [JUMP_IF_TRUE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [JUMP_BACKWARD_NO_INTERRUPT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MATCH_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MATCH_MAPPING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MATCH_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MATCH_KEYS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [GET_YIELD_FROM_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FOR_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FOR_ITER_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FOR_ITER_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FOR_ITER_RANGE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FOR_ITER_GEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BEFORE_ASYNC_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BEFORE_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [WITH_EXCEPT_START] = { 4, 5, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [PUSH_EXC_INFO] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_METHOD_WITH_VALUES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_METHOD_WITH_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_METHOD_NO_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [LOAD_ATTR_METHOD_LAZY_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_BOUND_METHOD_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [KW_NAMES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_PY_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_PY_WITH_DEFAULTS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_TYPE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_STR_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_TUPLE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_BUILTIN_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_BUILTIN_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_BUILTIN_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_ISINSTANCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_LIST_APPEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_METHOD_DESCRIPTOR_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CALL_FUNCTION_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [MAKE_FUNCTION] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [RETURN_GENERATOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BUILD_SLICE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [FORMAT_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [COPY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [BINARY_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
+    [SWAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [EXTENDED_ARG] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+    [CACHE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
+};
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 5eed74c5e147..7452b2ced052 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -21,6 +21,9 @@
 DEFAULT_OUTPUT = os.path.relpath(
     os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
 )
+DEFAULT_METADATA_OUTPUT = os.path.relpath(
+    os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
+)
 BEGIN_MARKER = "// BEGIN BYTECODES //"
 END_MARKER = "// END BYTECODES //"
 RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$"
@@ -37,6 +40,12 @@
 arg_parser.add_argument(
     "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
 )
+arg_parser.add_argument(
+    "-m",
+    "--metadata",
+    action="store_true",
+    help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
+)
 
 
 class Formatter:
@@ -96,6 +105,8 @@ def assign(self, dst: StackEffect, src: StackEffect):
         cast = self.cast(dst, src)
         if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
             self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
+        elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
+            self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
         else:
             self.emit(f"{dst.name} = {cast}{src.name};")
 
@@ -109,7 +120,8 @@ class Instruction:
 
     # Parts of the underlying instruction definition
     inst: parser.InstDef
-    kind: typing.Literal["inst", "op"]
+    register: bool
+    kind: typing.Literal["inst", "op", "legacy"]  # Legacy means no (input -- output)
     name: str
     block: parser.Block
     block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
@@ -121,6 +133,9 @@ class Instruction:
     cache_effects: list[parser.CacheEffect]
     input_effects: list[StackEffect]
     output_effects: list[StackEffect]
+    # Parallel to input_effects
+    input_registers: list[str] = dataclasses.field(repr=False)
+    output_registers: list[str] = dataclasses.field(repr=False)
 
     # Set later
     family: parser.Family | None = None
@@ -129,6 +144,7 @@ class Instruction:
 
     def __init__(self, inst: parser.InstDef):
         self.inst = inst
+        self.register = inst.register
         self.kind = inst.kind
         self.name = inst.name
         self.block = inst.block
@@ -150,9 +166,24 @@ def __init__(self, inst: parser.InstDef):
                 break
         self.unmoved_names = frozenset(unmoved_names)
 
+    def analyze_registers(self, a: "Analyzer") -> None:
+        regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)"))
+        try:
+            self.input_registers = [
+                next(regs) for ieff in self.input_effects if ieff.name != UNUSED
+            ]
+            self.output_registers = [
+                next(regs) for oeff in self.output_effects if oeff.name != UNUSED
+            ]
+        except StopIteration:  # Running out of registers
+            a.error(
+                f"Instruction {self.name} has too many register effects", node=self.inst
+            )
+
     def write(self, out: Formatter) -> None:
         """Write one instruction, sans prologue and epilogue."""
         # Write a static assertion that a family's cache size is correct
+
         if family := self.family:
             if self.name == family.members[0]:
                 if cache_size := family.size:
@@ -161,10 +192,16 @@ def write(self, out: Formatter) -> None:
                         f'{self.cache_offset}, "incorrect cache size");'
                     )
 
-        # Write input stack effect variable declarations and initializations
-        for i, ieffect in enumerate(reversed(self.input_effects), 1):
-            src = StackEffect(f"PEEK({i})", "")
-            out.declare(ieffect, src)
+        if not self.register:
+            # Write input stack effect variable declarations and initializations
+            for i, ieffect in enumerate(reversed(self.input_effects), 1):
+                src = StackEffect(f"PEEK({i})", "")
+                out.declare(ieffect, src)
+        else:
+            # Write input register variable declarations and initializations
+            for ieffect, reg in zip(self.input_effects, self.input_registers):
+                src = StackEffect(reg, "")
+                out.declare(ieffect, src)
 
         # Write output stack effect variable declarations
         input_names = {ieffect.name for ieffect in self.input_effects}
@@ -172,20 +209,28 @@ def write(self, out: Formatter) -> None:
             if oeffect.name not in input_names:
                 out.declare(oeffect, None)
 
+        # out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);")
+
         self.write_body(out, 0)
 
         # Skip the rest if the block always exits
         if self.always_exits:
             return
 
-        # Write net stack growth/shrinkage
-        diff = len(self.output_effects) - len(self.input_effects)
-        out.stack_adjust(diff)
+        if not self.register:
+            # Write net stack growth/shrinkage
+            diff = len(self.output_effects) - len(self.input_effects)
+            out.stack_adjust(diff)
 
-        # Write output stack effect assignments
-        for i, oeffect in enumerate(reversed(self.output_effects), 1):
-            if oeffect.name not in self.unmoved_names:
-                dst = StackEffect(f"PEEK({i})", "")
+            # Write output stack effect assignments
+            for i, oeffect in enumerate(reversed(self.output_effects), 1):
+                if oeffect.name not in self.unmoved_names:
+                    dst = StackEffect(f"PEEK({i})", "")
+                    out.assign(dst, oeffect)
+        else:
+            # Write output register assignments
+            for oeffect, reg in zip(self.output_effects, self.output_registers):
+                dst = StackEffect(reg, "")
                 out.assign(dst, oeffect)
 
         # Write cache effect
@@ -209,7 +254,9 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
                 else:
                     typ = f"uint{bits}_t "
                     func = f"read_u{bits}"
-                out.emit(f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);")
+                out.emit(
+                    f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
+                )
             cache_offset += ceffect.size
         assert cache_offset == self.cache_offset + cache_adjust
 
@@ -222,14 +269,17 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
                 # ERROR_IF() must pop the inputs from the stack.
                 # The code block is responsible for DECREF()ing them.
                 # NOTE: If the label doesn't exist, just add it to ceval.c.
-                ninputs = len(self.input_effects)
-                # Don't pop common input/output effects at the bottom!
-                # These aren't DECREF'ed so they can stay.
-                for ieff, oeff in zip(self.input_effects, self.output_effects):
-                    if ieff.name == oeff.name:
-                        ninputs -= 1
-                    else:
-                        break
+                if not self.register:
+                    ninputs = len(self.input_effects)
+                    # Don't pop common input/output effects at the bottom!
+                    # These aren't DECREF'ed so they can stay.
+                    for ieff, oeff in zip(self.input_effects, self.output_effects):
+                        if ieff.name == oeff.name:
+                            ninputs -= 1
+                        else:
+                            break
+                else:
+                    ninputs = 0
                 if ninputs:
                     out.write_raw(
                         f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
@@ -237,10 +287,11 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
                 else:
                     out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
             elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line):
-                space = m.group(1)
-                for ieff in self.input_effects:
-                    if ieff.name not in self.unmoved_names:
-                        out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
+                if not self.register:
+                    space = m.group(1)
+                    for ieff in self.input_effects:
+                        if ieff.name not in self.unmoved_names:
+                            out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
             else:
                 out.write_raw(extra + line)
 
@@ -392,6 +443,7 @@ def analyze(self) -> None:
         self.find_predictions()
         self.map_families()
         self.check_families()
+        self.analyze_register_instrs()
         self.analyze_supers_and_macros()
 
     def find_predictions(self) -> None:
@@ -458,6 +510,11 @@ def check_families(self) -> None:
                         family,
                     )
 
+    def analyze_register_instrs(self) -> None:
+        for instr in self.instrs.values():
+            if instr.register:
+                instr.analyze_registers(self)
+
     def analyze_supers_and_macros(self) -> None:
         """Analyze each super- and macro instruction."""
         self.super_instrs = {}
@@ -563,6 +620,129 @@ def stack_analysis(
         ]
         return stack, -lowest
 
+    def write_metadata(self) -> None:
+        """Write instruction metadata to output file."""
+        with open(self.output_filename, "w") as f:
+            # Write provenance header
+            f.write(
+                f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
+            )
+            f.write(f"// from {os.path.relpath(self.filename)}\n")
+            f.write(f"// Do not edit!\n")
+
+            # Create formatter; the rest of the code uses this
+            self.out = Formatter(f, 0)
+
+            # Write variable definition
+            self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
+            self.out.emit("static const struct {")
+            with self.out.indent():
+                self.out.emit("short n_popped;")
+                self.out.emit("short n_pushed;")
+                self.out.emit("enum Direction dir_op1;")
+                self.out.emit("enum Direction dir_op2;")
+                self.out.emit("enum Direction dir_op3;")
+                self.out.emit("bool valid_entry;")
+                self.out.emit("char instr_format[10];")
+            self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
+
+            # Write metadata for each instruction
+            for thing in self.everything:
+                match thing:
+                    case parser.InstDef():
+                        if thing.kind != "op":
+                            self.write_metadata_for_inst(self.instrs[thing.name])
+                    case parser.Super():
+                        self.write_metadata_for_super(self.super_instrs[thing.name])
+                    case parser.Macro():
+                        self.write_metadata_for_macro(self.macro_instrs[thing.name])
+                    case _:
+                        typing.assert_never(thing)
+
+            # Write end of array
+            self.out.emit("};")
+
+    def get_format(self, thing: Instruction | SuperInstruction | MacroInstruction) -> str:
+        """Get the format string for a single instruction."""
+        def instr_format(instr: Instruction) -> str:
+            if instr.register:
+                fmt = "IBBB"
+            else:
+                fmt = "IB"
+            cache = "C"
+            for ce in instr.cache_effects:
+                for _ in range(ce.size):
+                    fmt += cache
+                    cache = "0"
+            return fmt
+        match thing:
+            case Instruction():
+                format = instr_format(thing)
+            case SuperInstruction():
+                format = ""
+                for part in thing.parts:
+                    format += instr_format(part.instr)
+            case MacroInstruction():
+                # Macros don't support register instructions yet
+                format = "IB"
+                cache = "C"
+                for part in thing.parts:
+                    if isinstance(part, parser.CacheEffect):
+                        for _ in range(part.size):
+                            format += cache
+                            cache = "0"
+                    else:
+                        assert isinstance(part, Component)
+                        for ce in part.instr.cache_effects:
+                            for _ in range(ce.size):
+                                format += cache
+                                cache = "0"
+            case _:
+                typing.assert_never(thing)
+        assert len(format) < 10  # Else update the size of instr_format above
+        return format
+
+    def write_metadata_for_inst(self, instr: Instruction) -> None:
+        """Write metadata for a single instruction."""
+        dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
+        if instr.kind == "legacy":
+            n_popped = n_pushed = -1
+            assert not instr.register
+        else:
+            n_popped = len(instr.input_effects)
+            n_pushed = len(instr.output_effects)
+            if instr.register:
+                directions: list[str] = []
+                directions.extend("DIR_READ" for _ in instr.input_effects)
+                directions.extend("DIR_WRITE" for _ in instr.output_effects)
+                directions.extend("DIR_NONE" for _ in range(3))
+                dir_op1, dir_op2, dir_op3 = directions[:3]
+        format = self.get_format(instr)
+        self.out.emit(
+            f'    [{instr.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
+        )
+
+    def write_metadata_for_super(self, sup: SuperInstruction) -> None:
+        """Write metadata for a super-instruction."""
+        n_popped = sum(len(comp.instr.input_effects) for comp in sup.parts)
+        n_pushed = sum(len(comp.instr.output_effects) for comp in sup.parts)
+        dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
+        format = self.get_format(sup)
+        self.out.emit(
+            f'    [{sup.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
+        )
+
+    def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
+        """Write metadata for a macro-instruction."""
+        parts = [comp for comp in mac.parts if isinstance(comp, Component)]
+        n_popped = sum(len(comp.instr.input_effects) for comp in parts)
+        n_pushed = sum(len(comp.instr.output_effects) for comp in parts)
+        dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
+        format = self.get_format(mac)
+        self.out.emit(
+            f'    [{mac.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
+        )
+
     def write_instructions(self) -> None:
         """Write instructions to output file."""
         with open(self.output_filename, "w") as f:
@@ -571,7 +751,7 @@ def write_instructions(self) -> None:
             f.write(f"// from {os.path.relpath(self.filename)}\n")
             f.write(f"// Do not edit!\n")
 
-            # Create formatter; the rest of the code uses this.
+            # Create formatter; the rest of the code uses this
             self.out = Formatter(f, 8)
 
             # Write and count instructions of all kinds
@@ -581,7 +761,7 @@ def write_instructions(self) -> None:
             for thing in self.everything:
                 match thing:
                     case parser.InstDef():
-                        if thing.kind == "inst":
+                        if thing.kind != "op":
                             n_instrs += 1
                             self.write_instr(self.instrs[thing.name])
                     case parser.Super():
@@ -616,9 +796,13 @@ def write_super(self, sup: SuperInstruction) -> None:
         with self.wrap_super_or_macro(sup):
             first = True
             for comp in sup.parts:
-                if not first:
+                if first:
+                    pass
+                    # self.out.emit("JUMPBY(OPSIZE(opcode) - 1);")
+                else:
                     self.out.emit("NEXTOPARG();")
                     self.out.emit("JUMPBY(1);")
+                    # self.out.emit("JUMPBY(OPSIZE(opcode));")
                 first = False
                 comp.write_body(self.out, 0)
                 if comp.instr.cache_offset:
@@ -711,12 +895,18 @@ def always_exits(lines: list[str]) -> bool:
 def main():
     """Parse command line, parse input, analyze, write output."""
     args = arg_parser.parse_args()  # Prints message and sys.exit(2) on error
+    if args.metadata:
+        if args.output == DEFAULT_OUTPUT:
+            args.output = DEFAULT_METADATA_OUTPUT
     a = Analyzer(args.input, args.output)  # Raises OSError if input unreadable
     a.parse()  # Raises SyntaxError on failure
     a.analyze()  # Prints messages and sets a.errors on failure
     if a.errors:
         sys.exit(f"Found {a.errors} errors")
-    a.write_instructions()  # Raises OSError if output can't be written
+    if args.metadata:
+        a.write_metadata()
+    else:
+        a.write_instructions()  # Raises OSError if output can't be written
 
 
 if __name__ == "__main__":
diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py
index d802c733dfd1..4885394bf6b1 100644
--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@@ -84,7 +84,8 @@ class OpName(Node):
 
 @dataclass
 class InstHeader(Node):
-    kind: Literal["inst", "op"]
+    register: bool
+    kind: Literal["inst", "op", "legacy"]  # Legacy means no (inputs -- outputs)
     name: str
     inputs: list[InputEffect]
     outputs: list[OutputEffect]
@@ -92,7 +93,8 @@ class InstHeader(Node):
 
 @dataclass
 class InstDef(Node):
-    kind: Literal["inst", "op"]
+    register: bool
+    kind: Literal["inst", "op", "legacy"]
     name: str
     inputs: list[InputEffect]
     outputs: list[OutputEffect]
@@ -134,16 +136,19 @@ def definition(self) -> InstDef | Super | Macro | Family | None:
     def inst_def(self) -> InstDef | None:
         if hdr := self.inst_header():
             if block := self.block():
-                return InstDef(hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block)
+                return InstDef(
+                    hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
+                )
             raise self.make_syntax_error("Expected block")
         return None
 
     @contextual
     def inst_header(self) -> InstHeader | None:
         # inst(NAME)
-        #   | inst(NAME, (inputs -- outputs))
-        #   | op(NAME, (inputs -- outputs))
+        #   | [register] inst(NAME, (inputs -- outputs))
+        #   | [register] op(NAME, (inputs -- outputs))
         # TODO: Make INST a keyword in the lexer.
+        register = bool(self.expect(lx.REGISTER))
         if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
             if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
                 name = tkn.text
@@ -151,10 +156,10 @@ def inst_header(self) -> InstHeader | None:
                     inp, outp = self.io_effect()
                     if self.expect(lx.RPAREN):
                         if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
-                            return InstHeader(kind, name, inp, outp)
+                            return InstHeader(register, kind, name, inp, outp)
                 elif self.expect(lx.RPAREN) and kind == "inst":
                     # No legacy stack effect if kind is "op".
-                    return InstHeader(kind, name, [], [])
+                    return InstHeader(register, "legacy", name, [], [])
         return None
 
     def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:



More information about the Python-checkins mailing list