[Python-checkins] GH-98831: Implement array support in cases generator (#100912)

gvanrossum webhook-mailer at python.org
Tue Jan 17 18:59:34 EST 2023


https://github.com/python/cpython/commit/80e3e3423cb456ba341f95d6a07e94cf3b16c0be
commit: 80e3e3423cb456ba341f95d6a07e94cf3b16c0be
branch: main
author: Guido van Rossum <guido at python.org>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2023-01-17T15:59:19-08:00
summary:

GH-98831: Implement array support in cases generator (#100912)

You can now write things like this:
```
inst(BUILD_STRING, (pieces[oparg] -- str)) { ... }
inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ... }
```
Note that array output effects are only partially supported (they must be named `unused` or correspond to an input effect).

files:
M Python/bytecodes.c
M Python/generated_cases.c.h
M Python/opcode_metadata.h
M Tools/cases_generator/generate_cases.py
M Tools/cases_generator/parser.py
M Tools/cases_generator/test_generator.py

diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 18f9eabd842c..344e9bb80aae 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -451,16 +451,12 @@ dummy_func(
             DISPATCH_INLINED(new_frame);
         }
 
-        // Alternative: (list, unused[oparg], v -- list, unused[oparg])
-        inst(LIST_APPEND, (v --)) {
-            PyObject *list = PEEK(oparg + 1);  // +1 to account for v staying on stack
+        inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) {
             ERROR_IF(_PyList_AppendTakeRef((PyListObject *)list, v) < 0, error);
             PREDICT(JUMP_BACKWARD);
         }
 
-        // Alternative: (set, unused[oparg], v -- set, unused[oparg])
-        inst(SET_ADD, (v --)) {
-            PyObject *set = PEEK(oparg + 1);  // +1 to account for v staying on stack
+        inst(SET_ADD,  (set, unused[oparg-1], v -- set, unused[oparg-1])) {
             int err = PySet_Add(set, v);
             Py_DECREF(v);
             ERROR_IF(err, error);
@@ -532,7 +528,7 @@ dummy_func(
             ERROR_IF(res == NULL, error);
         }
 
-        // stack effect: (__array[oparg] -- )
+        // This should remain a legacy instruction.
         inst(RAISE_VARARGS) {
             PyObject *cause = NULL, *exc = NULL;
             switch (oparg) {
@@ -1295,40 +1291,25 @@ dummy_func(
             }
         }
 
-        // stack effect: (__array[oparg] -- __0)
-        inst(BUILD_STRING) {
-            PyObject *str;
-            str = _PyUnicode_JoinArray(&_Py_STR(empty),
-                                       stack_pointer - oparg, oparg);
-            if (str == NULL)
-                goto error;
-            while (--oparg >= 0) {
-                PyObject *item = POP();
-                Py_DECREF(item);
+        inst(BUILD_STRING, (pieces[oparg] -- str)) {
+            str = _PyUnicode_JoinArray(&_Py_STR(empty), pieces, oparg);
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(pieces[i]);
             }
-            PUSH(str);
+            ERROR_IF(str == NULL, error);
         }
 
-        // stack effect: (__array[oparg] -- __0)
-        inst(BUILD_TUPLE) {
-            STACK_SHRINK(oparg);
-            PyObject *tup = _PyTuple_FromArraySteal(stack_pointer, oparg);
-            if (tup == NULL)
-                goto error;
-            PUSH(tup);
+        inst(BUILD_TUPLE, (values[oparg] -- tup)) {
+            tup = _PyTuple_FromArraySteal(values, oparg);
+            ERROR_IF(tup == NULL, error);
         }
 
-        // stack effect: (__array[oparg] -- __0)
-        inst(BUILD_LIST) {
-            STACK_SHRINK(oparg);
-            PyObject *list = _PyList_FromArraySteal(stack_pointer, oparg);
-            if (list == NULL)
-                goto error;
-            PUSH(list);
+        inst(BUILD_LIST, (values[oparg] -- list)) {
+            list = _PyList_FromArraySteal(values, oparg);
+            ERROR_IF(list == NULL, error);
         }
 
-        inst(LIST_EXTEND, (iterable -- )) {
-            PyObject *list = PEEK(oparg + 1);  // iterable is still on the stack
+        inst(LIST_EXTEND, (list, unused[oparg-1], iterable -- list, unused[oparg-1])) {
             PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable);
             if (none_val == NULL) {
                 if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) &&
@@ -1346,48 +1327,40 @@ dummy_func(
             DECREF_INPUTS();
         }
 
-        inst(SET_UPDATE, (iterable --)) {
-            PyObject *set = PEEK(oparg + 1);  // iterable is still on the stack
+        inst(SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1])) {
             int err = _PySet_Update(set, iterable);
             DECREF_INPUTS();
             ERROR_IF(err < 0, error);
         }
 
-        // stack effect: (__array[oparg] -- __0)
-        inst(BUILD_SET) {
-            PyObject *set = PySet_New(NULL);
+        inst(BUILD_SET, (values[oparg] -- set)) {
+            set = PySet_New(NULL);
             int err = 0;
-            int i;
-            if (set == NULL)
-                goto error;
-            for (i = oparg; i > 0; i--) {
-                PyObject *item = PEEK(i);
+            for (int i = 0; i < oparg; i++) {
+                PyObject *item = values[i];
                 if (err == 0)
                     err = PySet_Add(set, item);
                 Py_DECREF(item);
             }
-            STACK_SHRINK(oparg);
             if (err != 0) {
                 Py_DECREF(set);
-                goto error;
+                ERROR_IF(true, error);
             }
-            PUSH(set);
         }
 
-        // stack effect: (__array[oparg*2] -- __0)
-        inst(BUILD_MAP) {
-            PyObject *map = _PyDict_FromItems(
-                    &PEEK(2*oparg), 2,
-                    &PEEK(2*oparg - 1), 2,
+        inst(BUILD_MAP, (values[oparg*2] -- map)) {
+            map = _PyDict_FromItems(
+                    values, 2,
+                    values+1, 2,
                     oparg);
             if (map == NULL)
                 goto error;
 
-            while (oparg--) {
-                Py_DECREF(POP());
-                Py_DECREF(POP());
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(values[i*2]);
+                Py_DECREF(values[i*2+1]);
             }
-            PUSH(map);
+            ERROR_IF(map == NULL, error);
         }
 
         inst(SETUP_ANNOTATIONS, (--)) {
@@ -1432,28 +1405,21 @@ dummy_func(
             }
         }
 
-        // stack effect: (__array[oparg] -- )
-        inst(BUILD_CONST_KEY_MAP) {
-            PyObject *map;
-            PyObject *keys = TOP();
+        inst(BUILD_CONST_KEY_MAP, (values[oparg], keys -- map)) {
             if (!PyTuple_CheckExact(keys) ||
                 PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) {
                 _PyErr_SetString(tstate, PyExc_SystemError,
                                  "bad BUILD_CONST_KEY_MAP keys argument");
-                goto error;
+                goto error;  // Pop the keys and values.
             }
             map = _PyDict_FromItems(
                     &PyTuple_GET_ITEM(keys, 0), 1,
-                    &PEEK(oparg + 1), 1, oparg);
-            if (map == NULL) {
-                goto error;
-            }
-
-            Py_DECREF(POP());
-            while (oparg--) {
-                Py_DECREF(POP());
+                    values, 1, oparg);
+            Py_DECREF(keys);
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(values[i]);
             }
-            PUSH(map);
+            ERROR_IF(map == NULL, error);
         }
 
         inst(DICT_UPDATE, (update --)) {
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index e9819dfda997..445511df2a3a 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -574,7 +574,7 @@
 
         TARGET(LIST_APPEND) {
             PyObject *v = PEEK(1);
-            PyObject *list = PEEK(oparg + 1);  // +1 to account for v staying on stack
+            PyObject *list = PEEK(2 + (oparg-1));
             if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) goto pop_1_error;
             STACK_SHRINK(1);
             PREDICT(JUMP_BACKWARD);
@@ -583,7 +583,7 @@
 
         TARGET(SET_ADD) {
             PyObject *v = PEEK(1);
-            PyObject *set = PEEK(oparg + 1);  // +1 to account for v staying on stack
+            PyObject *set = PEEK(2 + (oparg-1));
             int err = PySet_Add(set, v);
             Py_DECREF(v);
             if (err) goto pop_1_error;
@@ -1511,40 +1511,44 @@
         }
 
         TARGET(BUILD_STRING) {
+            PyObject **pieces = &PEEK(oparg);
             PyObject *str;
-            str = _PyUnicode_JoinArray(&_Py_STR(empty),
-                                       stack_pointer - oparg, oparg);
-            if (str == NULL)
-                goto error;
-            while (--oparg >= 0) {
-                PyObject *item = POP();
-                Py_DECREF(item);
+            str = _PyUnicode_JoinArray(&_Py_STR(empty), pieces, oparg);
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(pieces[i]);
             }
-            PUSH(str);
+            if (str == NULL) { STACK_SHRINK(oparg); goto error; }
+            STACK_SHRINK(oparg);
+            STACK_GROW(1);
+            POKE(1, str);
             DISPATCH();
         }
 
         TARGET(BUILD_TUPLE) {
+            PyObject **values = &PEEK(oparg);
+            PyObject *tup;
+            tup = _PyTuple_FromArraySteal(values, oparg);
+            if (tup == NULL) { STACK_SHRINK(oparg); goto error; }
             STACK_SHRINK(oparg);
-            PyObject *tup = _PyTuple_FromArraySteal(stack_pointer, oparg);
-            if (tup == NULL)
-                goto error;
-            PUSH(tup);
+            STACK_GROW(1);
+            POKE(1, tup);
             DISPATCH();
         }
 
         TARGET(BUILD_LIST) {
+            PyObject **values = &PEEK(oparg);
+            PyObject *list;
+            list = _PyList_FromArraySteal(values, oparg);
+            if (list == NULL) { STACK_SHRINK(oparg); goto error; }
             STACK_SHRINK(oparg);
-            PyObject *list = _PyList_FromArraySteal(stack_pointer, oparg);
-            if (list == NULL)
-                goto error;
-            PUSH(list);
+            STACK_GROW(1);
+            POKE(1, list);
             DISPATCH();
         }
 
         TARGET(LIST_EXTEND) {
             PyObject *iterable = PEEK(1);
-            PyObject *list = PEEK(oparg + 1);  // iterable is still on the stack
+            PyObject *list = PEEK(2 + (oparg-1));
             PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable);
             if (none_val == NULL) {
                 if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) &&
@@ -1566,7 +1570,7 @@
 
         TARGET(SET_UPDATE) {
             PyObject *iterable = PEEK(1);
-            PyObject *set = PEEK(oparg + 1);  // iterable is still on the stack
+            PyObject *set = PEEK(2 + (oparg-1));
             int err = _PySet_Update(set, iterable);
             Py_DECREF(iterable);
             if (err < 0) goto pop_1_error;
@@ -1575,39 +1579,44 @@
         }
 
         TARGET(BUILD_SET) {
-            PyObject *set = PySet_New(NULL);
+            PyObject **values = &PEEK(oparg);
+            PyObject *set;
+            set = PySet_New(NULL);
             int err = 0;
-            int i;
-            if (set == NULL)
-                goto error;
-            for (i = oparg; i > 0; i--) {
-                PyObject *item = PEEK(i);
+            for (int i = 0; i < oparg; i++) {
+                PyObject *item = values[i];
                 if (err == 0)
                     err = PySet_Add(set, item);
                 Py_DECREF(item);
             }
-            STACK_SHRINK(oparg);
             if (err != 0) {
                 Py_DECREF(set);
-                goto error;
+                if (true) { STACK_SHRINK(oparg); goto error; }
             }
-            PUSH(set);
+            STACK_SHRINK(oparg);
+            STACK_GROW(1);
+            POKE(1, set);
             DISPATCH();
         }
 
         TARGET(BUILD_MAP) {
-            PyObject *map = _PyDict_FromItems(
-                    &PEEK(2*oparg), 2,
-                    &PEEK(2*oparg - 1), 2,
+            PyObject **values = &PEEK(oparg*2);
+            PyObject *map;
+            map = _PyDict_FromItems(
+                    values, 2,
+                    values+1, 2,
                     oparg);
             if (map == NULL)
                 goto error;
 
-            while (oparg--) {
-                Py_DECREF(POP());
-                Py_DECREF(POP());
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(values[i*2]);
+                Py_DECREF(values[i*2+1]);
             }
-            PUSH(map);
+            if (map == NULL) { STACK_SHRINK(oparg*2); goto error; }
+            STACK_SHRINK(oparg*2);
+            STACK_GROW(1);
+            POKE(1, map);
             DISPATCH();
         }
 
@@ -1655,26 +1664,25 @@
         }
 
         TARGET(BUILD_CONST_KEY_MAP) {
+            PyObject *keys = PEEK(1);
+            PyObject **values = &PEEK(1 + oparg);
             PyObject *map;
-            PyObject *keys = TOP();
             if (!PyTuple_CheckExact(keys) ||
                 PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) {
                 _PyErr_SetString(tstate, PyExc_SystemError,
                                  "bad BUILD_CONST_KEY_MAP keys argument");
-                goto error;
+                goto error;  // Pop the keys and values.
             }
             map = _PyDict_FromItems(
                     &PyTuple_GET_ITEM(keys, 0), 1,
-                    &PEEK(oparg + 1), 1, oparg);
-            if (map == NULL) {
-                goto error;
-            }
-
-            Py_DECREF(POP());
-            while (oparg--) {
-                Py_DECREF(POP());
+                    values, 1, oparg);
+            Py_DECREF(keys);
+            for (int i = 0; i < oparg; i++) {
+                Py_DECREF(values[i]);
             }
-            PUSH(map);
+            if (map == NULL) { STACK_SHRINK(oparg); goto pop_1_error; }
+            STACK_SHRINK(oparg);
+            POKE(1, map);
             DISPATCH();
         }
 
diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h
index d293adafeeec..e3d8ed340e44 100644
--- a/Python/opcode_metadata.h
+++ b/Python/opcode_metadata.h
@@ -45,8 +45,8 @@ static const struct {
     [BINARY_SUBSCR_TUPLE_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC000 },
     [BINARY_SUBSCR_DICT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC000 },
     [BINARY_SUBSCR_GETITEM] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC000 },
-    [LIST_APPEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
-    [SET_ADD] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
+    [LIST_APPEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
+    [SET_ADD] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [STORE_SUBSCR] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC },
     [STORE_SUBSCR_LIST_INT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC },
     [STORE_SUBSCR_DICT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IXC },
@@ -92,8 +92,8 @@ static const struct {
     [BUILD_STRING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [BUILD_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [BUILD_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
-    [LIST_EXTEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
-    [SET_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
+    [LIST_EXTEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
+    [SET_UPDATE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [BUILD_SET] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [BUILD_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [SETUP_ANNOTATIONS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 4bd99b78f811..59e249930b45 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -15,14 +15,14 @@
 import parser
 from parser import StackEffect
 
-DEFAULT_INPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c")
-)
-DEFAULT_OUTPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
-)
+HERE = os.path.dirname(__file__)
+ROOT = os.path.join(HERE, "../..")
+THIS = os.path.relpath(__file__, ROOT)
+
+DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
+DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
 DEFAULT_METADATA_OUTPUT = os.path.relpath(
-    os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
+    os.path.join(ROOT, "Python/opcode_metadata.h")
 )
 BEGIN_MARKER = "// BEGIN BYTECODES //"
 END_MARKER = "// END BYTECODES //"
@@ -48,6 +48,55 @@
 )
 
 
+def effect_size(effect: StackEffect) -> tuple[int, str]:
+    """Return the 'size' impact of a stack effect.
+
+    Returns a tuple (numeric, symbolic) where:
+
+    - numeric is an int giving the statically analyzable size of the effect
+    - symbolic is a string representing a variable effect (e.g. 'oparg*2')
+
+    At most one of these will be non-zero / non-empty.
+    """
+    if effect.size:
+        return 0, effect.size
+    else:
+        return 1, ""
+
+
+def maybe_parenthesize(sym: str) -> str:
+    """Add parentheses around a string if it contains an operator.
+
+    An exception is made for '*' which is common and harmless
+    in the context where the symbolic size is used.
+    """
+    if re.match(r"^[\s\w*]+$", sym):
+        return sym
+    else:
+        return f"({sym})"
+
+
+def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
+    numeric = 0
+    symbolic: list[str] = []
+    for effect in effects:
+        diff, sym = effect_size(effect)
+        numeric += diff
+        if sym:
+            symbolic.append(maybe_parenthesize(sym))
+    return numeric, " + ".join(symbolic)
+
+
+def string_effect_size(arg: tuple[int, str]) -> str:
+    numeric, symbolic = arg
+    if numeric and symbolic:
+        return f"{numeric} + {symbolic}"
+    elif symbolic:
+        return symbolic
+    else:
+        return str(numeric)
+
+
 class Formatter:
     """Wraps an output stream with the ability to indent etc."""
 
@@ -83,28 +132,43 @@ def block(self, head: str):
             yield
         self.emit("}")
 
-    def stack_adjust(self, diff: int):
+    def stack_adjust(self, diff: int, input_effects: list[StackEffect], output_effects: list[StackEffect]):
+        # TODO: Get rid of 'diff' parameter
+        shrink, isym = list_effect_size(input_effects)
+        grow, osym = list_effect_size(output_effects)
+        diff += grow - shrink
+        if isym and isym != osym:
+            self.emit(f"STACK_SHRINK({isym});")
+        if diff < 0:
+            self.emit(f"STACK_SHRINK({-diff});")
         if diff > 0:
             self.emit(f"STACK_GROW({diff});")
-        elif diff < 0:
-            self.emit(f"STACK_SHRINK({-diff});")
+        if osym and osym != isym:
+            self.emit(f"STACK_GROW({osym});")
 
     def declare(self, dst: StackEffect, src: StackEffect | None):
         if dst.name == UNUSED:
             return
-        typ = f"{dst.type} " if dst.type else "PyObject *"
+        typ = f"{dst.type}" if dst.type else "PyObject *"
         init = ""
         if src:
             cast = self.cast(dst, src)
             init = f" = {cast}{src.name}"
-        self.emit(f"{typ}{dst.name}{init};")
+        sepa = "" if typ.endswith("*") else " "
+        self.emit(f"{typ}{sepa}{dst.name}{init};")
 
     def assign(self, dst: StackEffect, src: StackEffect):
         if src.name == UNUSED:
             return
         cast = self.cast(dst, src)
-        if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
+        if m := re.match(r"^PEEK\((.*)\)$", dst.name):
             self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
+        elif m := re.match(r"^&PEEK\(.*\)$", dst.name):
+            # NOTE: MOVE_ITEMS() does not actually exist.
+            # The only supported output array forms are:
+            # - unused[...]
+            # - X[...] where X[...] matches an input array exactly
+            self.emit(f"MOVE_ITEMS({dst.name}, {src.name}, {src.size});")
         elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
             self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
         else:
@@ -172,7 +236,7 @@ def __init__(self, inst: parser.InstDef):
             num_dummies = (num_regs // 2) * 2 + 1 - num_regs
             fmt = "I" + "B"*num_regs + "X"*num_dummies
         else:
-            if variable_used(inst.block, "oparg"):
+            if variable_used(inst, "oparg"):
                 fmt = "IB"
             else:
                 fmt = "IX"
@@ -200,7 +264,6 @@ def analyze_registers(self, a: "Analyzer") -> None:
     def write(self, out: Formatter) -> None:
         """Write one instruction, sans prologue and epilogue."""
         # Write a static assertion that a family's cache size is correct
-
         if family := self.family:
             if self.name == family.members[0]:
                 if cache_size := family.size:
@@ -211,8 +274,13 @@ def write(self, out: Formatter) -> None:
 
         if not self.register:
             # Write input stack effect variable declarations and initializations
-            for i, ieffect in enumerate(reversed(self.input_effects), 1):
-                src = StackEffect(f"PEEK({i})", "")
+            ieffects = list(reversed(self.input_effects))
+            for i, ieffect in enumerate(ieffects):
+                isize = string_effect_size(list_effect_size(ieffects[:i+1]))
+                if ieffect.size:
+                    src = StackEffect(f"&PEEK({isize})", "PyObject **")
+                else:
+                    src = StackEffect(f"PEEK({isize})", "")
                 out.declare(ieffect, src)
         else:
             # Write input register variable declarations and initializations
@@ -236,14 +304,19 @@ def write(self, out: Formatter) -> None:
 
         if not self.register:
             # Write net stack growth/shrinkage
-            diff = len(self.output_effects) - len(self.input_effects)
-            out.stack_adjust(diff)
+            out.stack_adjust(0, self.input_effects, self.output_effects)
 
             # Write output stack effect assignments
-            for i, oeffect in enumerate(reversed(self.output_effects), 1):
-                if oeffect.name not in self.unmoved_names:
-                    dst = StackEffect(f"PEEK({i})", "")
-                    out.assign(dst, oeffect)
+            oeffects = list(reversed(self.output_effects))
+            for i, oeffect in enumerate(oeffects):
+                if oeffect.name in self.unmoved_names:
+                    continue
+                osize = string_effect_size(list_effect_size(oeffects[:i+1]))
+                if oeffect.size:
+                    dst = StackEffect(f"&PEEK({osize})", "PyObject **")
+                else:
+                    dst = StackEffect(f"PEEK({osize})", "")
+                out.assign(dst, oeffect)
         else:
             # Write output register assignments
             for oeffect, reg in zip(self.output_effects, self.output_registers):
@@ -287,19 +360,21 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
                 # The code block is responsible for DECREF()ing them.
                 # NOTE: If the label doesn't exist, just add it to ceval.c.
                 if not self.register:
-                    ninputs = len(self.input_effects)
                     # Don't pop common input/output effects at the bottom!
                     # These aren't DECREF'ed so they can stay.
-                    for ieff, oeff in zip(self.input_effects, self.output_effects):
-                        if ieff.name == oeff.name:
-                            ninputs -= 1
-                        else:
-                            break
+                    ieffs = list(self.input_effects)
+                    oeffs = list(self.output_effects)
+                    while ieffs and oeffs and ieffs[0] == oeffs[0]:
+                        ieffs.pop(0)
+                        oeffs.pop(0)
+                    ninputs, symbolic = list_effect_size(ieffs)
+                    if ninputs:
+                        label = f"pop_{ninputs}_{label}"
                 else:
-                    ninputs = 0
-                if ninputs:
+                    symbolic = ""
+                if symbolic:
                     out.write_raw(
-                        f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
+                        f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
                     )
                 else:
                     out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
@@ -635,6 +710,13 @@ def stack_analysis(
         for thing in components:
             match thing:
                 case Instruction() as instr:
+                    if any(eff.size for eff in instr.input_effects + instr.output_effects):
+                        # TODO: Eventually this will be needed, at least for macros.
+                        self.error(
+                            f"Instruction {instr.name!r} has variable-sized stack effect, "
+                            "which are not supported in super- or macro instructions",
+                            instr.inst,  # TODO: Pass name+location of super/macro
+                        )
                     current -= len(instr.input_effects)
                     lowest = min(lowest, current)
                     current += len(instr.output_effects)
@@ -673,10 +755,8 @@ def write_metadata(self) -> None:
 
         with open(self.output_filename, "w") as f:
             # Write provenance header
-            f.write(
-                f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
-            )
-            f.write(f"// from {os.path.relpath(self.filename)}\n")
+            f.write(f"// This file is generated by {THIS} --metadata\n")
+            f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
             f.write(f"// Do not edit!\n")
 
             # Create formatter; the rest of the code uses this
@@ -719,8 +799,11 @@ def write_metadata_for_inst(self, instr: Instruction) -> None:
             n_popped = n_pushed = -1
             assert not instr.register
         else:
-            n_popped = len(instr.input_effects)
-            n_pushed = len(instr.output_effects)
+            n_popped, sym_popped = list_effect_size(instr.input_effects)
+            n_pushed, sym_pushed = list_effect_size(instr.output_effects)
+            if sym_popped or sym_pushed:
+                # TODO: Record symbolic effects (how?)
+                n_popped = n_pushed = -1
             if instr.register:
                 directions: list[str] = []
                 directions.extend("DIR_READ" for _ in instr.input_effects)
@@ -754,8 +837,8 @@ def write_instructions(self) -> None:
         """Write instructions to output file."""
         with open(self.output_filename, "w") as f:
             # Write provenance header
-            f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")
-            f.write(f"// from {os.path.relpath(self.filename)}\n")
+            f.write(f"// This file is generated by {THIS}\n")
+            f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
             f.write(f"// Do not edit!\n")
 
             # Create formatter; the rest of the code uses this
@@ -848,7 +931,9 @@ def wrap_super_or_macro(self, up: SuperOrMacroInstruction):
 
             yield
 
-            self.out.stack_adjust(up.final_sp - up.initial_sp)
+            # TODO: Use slices of up.stack instead of numeric values
+            self.out.stack_adjust(up.final_sp - up.initial_sp, [], [])
+
             for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
                 dst = StackEffect(f"PEEK({i})", "")
                 self.out.assign(dst, var)
@@ -899,9 +984,9 @@ def always_exits(lines: list[str]) -> bool:
     )
 
 
-def variable_used(block: parser.Block, name: str) -> bool:
-    """Determine whether a variable with a given name is used in a block."""
-    return any(token.kind == "IDENTIFIER" and token.text == name for token in block.tokens)
+def variable_used(node: parser.Node, name: str) -> bool:
+    """Determine whether a variable with a given name is used in a node."""
+    return any(token.kind == "IDENTIFIER" and token.text == name for token in node.tokens)
 
 
 def main():
diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py
index fad0cb90c69e..c2cebe96ccd6 100644
--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@@ -38,7 +38,7 @@ def __repr__(self):
 
 @dataclass
 class Node:
-    context: Context | None = field(init=False, default=None)
+    context: Context | None = field(init=False, compare=False, default=None)
 
     @property
     def text(self) -> str:
@@ -51,19 +51,37 @@ def to_text(self, dedent: int = 0) -> str:
         tokens = context.owner.tokens
         begin = context.begin
         end = context.end
-        return lx.to_text(tokens[begin:end], dedent)
+        return lx.to_text(self.tokens, dedent)
+
+    @property
+    def tokens(self) -> list[lx.Token]:
+        context = self.context
+        if not context:
+            return []
+        tokens = context.owner.tokens
+        begin = context.begin
+        end = context.end
+        return tokens[begin:end]
 
 
 @dataclass
 class Block(Node):
-    tokens: list[lx.Token]
+    # This just holds a context which has the list of tokens.
+    pass
 
 
 @dataclass
 class StackEffect(Node):
     name: str
-    type: str = ""
-    # TODO: array, condition
+    type: str = ""  # Optional `:type`
+    size: str = ""  # Optional `[size]`
+    # Note: we can have type or size but not both
+    # TODO: condition (can be combined with type but not size)
+
+
+ at dataclass
+class Dimension(Node):
+    size: str
 
 
 @dataclass
@@ -221,13 +239,31 @@ def cache_effect(self) -> CacheEffect | None:
 
     @contextual
     def stack_effect(self) -> StackEffect | None:
-        # IDENTIFIER [':' IDENTIFIER]
-        # TODO: Arrays, conditions
+        # IDENTIFIER
+        #   | IDENTIFIER ':' IDENTIFIER
+        #   | IDENTIFIER '[' dimension ']'
+        # TODO: Conditions
         if tkn := self.expect(lx.IDENTIFIER):
-            type = ""
             if self.expect(lx.COLON):
-                type = self.require(lx.IDENTIFIER).text
-            return StackEffect(tkn.text, type)
+                typ = self.require(lx.IDENTIFIER)
+                return StackEffect(tkn.text, typ.text)
+            elif self.expect(lx.LBRACKET):
+                if not (dim := self.dimension()):
+                    raise self.make_syntax_error("Expected dimension")
+                self.require(lx.RBRACKET)
+                return StackEffect(tkn.text, "PyObject **", dim.text.strip())
+            else:
+                return StackEffect(tkn.text)
+
+    @contextual
+    def dimension(self) -> Dimension | None:
+        tokens: list[lx.Token] = []
+        while (tkn := self.peek()) and tkn.kind != lx.RBRACKET:
+            tokens.append(tkn)
+            self.next()
+        if not tokens:
+            return None
+        return Dimension(lx.to_text(tokens).strip())
 
     @contextual
     def super_def(self) -> Super | None:
@@ -331,8 +367,8 @@ def members(self) -> list[str] | None:
 
     @contextual
     def block(self) -> Block:
-        tokens = self.c_blob()
-        return Block(tokens)
+        if self.c_blob():
+            return Block()
 
     def c_blob(self) -> list[lx.Token]:
         tokens: list[lx.Token] = []
diff --git a/Tools/cases_generator/test_generator.py b/Tools/cases_generator/test_generator.py
index e707a5334268..ae0c1e2f97c3 100644
--- a/Tools/cases_generator/test_generator.py
+++ b/Tools/cases_generator/test_generator.py
@@ -4,6 +4,36 @@
 import tempfile
 
 import generate_cases
+from parser import StackEffect
+
+
+def test_effect_sizes():
+    input_effects = [
+        x := StackEffect("x", "", ""),
+        y := StackEffect("y", "", "oparg"),
+        z := StackEffect("z", "", "oparg*2"),
+    ]
+    output_effects = [
+        a := StackEffect("a", "", ""),
+        b := StackEffect("b", "", "oparg*4"),
+        c := StackEffect("c", "", ""),
+    ]
+    other_effects = [
+        p := StackEffect("p", "", "oparg<<1"),
+        q := StackEffect("q", "", ""),
+        r := StackEffect("r", "", ""),
+    ]
+    assert generate_cases.effect_size(x) == (1, "")
+    assert generate_cases.effect_size(y) == (0, "oparg")
+    assert generate_cases.effect_size(z) == (0, "oparg*2")
+
+    assert generate_cases.list_effect_size(input_effects) == (1, "oparg + oparg*2")
+    assert generate_cases.list_effect_size(output_effects) == (2, "oparg*4")
+    assert generate_cases.list_effect_size(other_effects) == (2, "(oparg<<1)")
+
+    assert generate_cases.string_effect_size(generate_cases.list_effect_size(input_effects)) == "1 + oparg + oparg*2"
+    assert generate_cases.string_effect_size(generate_cases.list_effect_size(output_effects)) == "2 + oparg*4"
+    assert generate_cases.string_effect_size(generate_cases.list_effect_size(other_effects)) == "2 + (oparg<<1)"
 
 
 def run_cases_test(input: str, expected: str):
@@ -123,6 +153,24 @@ def test_binary_op():
     """
     run_cases_test(input, output)
 
+def test_overlap():
+    input = """
+        inst(OP, (left, right -- left, result)) {
+            spam();
+        }
+    """
+    output = """
+        TARGET(OP) {
+            PyObject *right = PEEK(1);
+            PyObject *left = PEEK(2);
+            PyObject *result;
+            spam();
+            POKE(1, result);
+            DISPATCH();
+        }
+    """
+    run_cases_test(input, output)
+
 def test_predictions():
     input = """
         inst(OP1, (--)) {
@@ -308,3 +356,81 @@ def test_macro_instruction():
         }
     """
     run_cases_test(input, output)
+
+def test_array_input():
+    input = """
+        inst(OP, (below, values[oparg*2], above --)) {
+            spam();
+        }
+    """
+    output = """
+        TARGET(OP) {
+            PyObject *above = PEEK(1);
+            PyObject **values = &PEEK(1 + oparg*2);
+            PyObject *below = PEEK(2 + oparg*2);
+            spam();
+            STACK_SHRINK(oparg*2);
+            STACK_SHRINK(2);
+            DISPATCH();
+        }
+    """
+    run_cases_test(input, output)
+
+def test_array_output():
+    input = """
+        inst(OP, (-- below, values[oparg*3], above)) {
+            spam();
+        }
+    """
+    output = """
+        TARGET(OP) {
+            PyObject *below;
+            PyObject **values;
+            PyObject *above;
+            spam();
+            STACK_GROW(2);
+            STACK_GROW(oparg*3);
+            POKE(1, above);
+            MOVE_ITEMS(&PEEK(1 + oparg*3), values, oparg*3);
+            POKE(2 + oparg*3, below);
+            DISPATCH();
+        }
+    """
+    run_cases_test(input, output)
+
+def test_array_input_output():
+    input = """
+        inst(OP, (below, values[oparg] -- values[oparg], above)) {
+            spam();
+        }
+    """
+    output = """
+        TARGET(OP) {
+            PyObject **values = &PEEK(oparg);
+            PyObject *below = PEEK(1 + oparg);
+            PyObject *above;
+            spam();
+            POKE(1, above);
+            MOVE_ITEMS(&PEEK(1 + oparg), values, oparg);
+            DISPATCH();
+        }
+    """
+    run_cases_test(input, output)
+
+def test_array_error_if():
+    input = """
+        inst(OP, (extra, values[oparg] --)) {
+            ERROR_IF(oparg == 0, somewhere);
+        }
+    """
+    output = """
+        TARGET(OP) {
+            PyObject **values = &PEEK(oparg);
+            PyObject *extra = PEEK(1 + oparg);
+            if (oparg == 0) { STACK_SHRINK(oparg); goto pop_1_somewhere; }
+            STACK_SHRINK(oparg);
+            STACK_SHRINK(1);
+            DISPATCH();
+        }
+    """
+    run_cases_test(input, output)



More information about the Python-checkins mailing list