[Python-checkins] bpo-43693: Clean up the PyCodeObject fields. (GH-26364)

markshannon webhook-mailer at python.org
Wed May 26 15:15:53 EDT 2021


https://github.com/python/cpython/commit/6cc800d3634fdd002b986c3ffe6a3d5540f311a0
commit: 6cc800d3634fdd002b986c3ffe6a3d5540f311a0
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: markshannon <mark at hotpy.org>
date: 2021-05-26T20:15:40+01:00
summary:

bpo-43693: Clean up the PyCodeObject fields. (GH-26364)

* Move up the comment about fields using in hashing/comparision.

* Group the fields more clearly.

* Add co_ncellvars and co_nfreevars.

* Raise ValueError if nlocals != len(varnames), rather than aborting.

files:
M Include/cpython/code.h
M Lib/test/test_code.py
M Objects/codeobject.c
M Objects/frameobject.c
M Objects/funcobject.c
M Objects/typeobject.c
M Python/ceval.c

diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index 575a4b72b2e0b..990f36787c5dd 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -17,31 +17,62 @@ typedef struct _PyOpcache _PyOpcache;
 /* Bytecode object */
 struct PyCodeObject {
     PyObject_HEAD
+
+    /* Note only the following fields are used in hash and/or comparisons
+     *
+     * - co_name
+     * - co_argcount
+     * - co_posonlyargcount
+     * - co_kwonlyargcount
+     * - co_nlocals
+     * - co_stacksize
+     * - co_flags
+     * - co_firstlineno
+     * - co_code
+     * - co_consts
+     * - co_names
+     * - co_varnames
+     * - co_freevars
+     * - co_cellvars
+     *
+     * This is done to preserve the name and line number for tracebacks
+     * and debuggers; otherwise, constant de-duplication would collapse
+     * identical functions/lambdas defined on different lines.
+     */
+
+    /* These fields are set with provided values on new code objects. */
+
+    // The hottest fields (in the eval loop) are grouped here at the top.
+    PyObject *co_code;          /* instruction opcodes */
+    PyObject *co_consts;        /* list (constants used) */
+    PyObject *co_names;         /* list of strings (names used) */
+    int co_flags;               /* CO_..., see below */
+    // The rest are not so impactful on performance.
     int co_argcount;            /* #arguments, except *args */
     int co_posonlyargcount;     /* #positional only arguments */
     int co_kwonlyargcount;      /* #keyword only arguments */
-    int co_nlocals;             /* #local variables */
     int co_stacksize;           /* #entries needed for evaluation stack */
-    int co_flags;               /* CO_..., see below */
     int co_firstlineno;         /* first source line number */
-    PyObject *co_code;          /* instruction opcodes */
-    PyObject *co_consts;        /* list (constants used) */
-    PyObject *co_names;         /* list of strings (names used) */
     PyObject *co_varnames;      /* tuple of strings (local variable names) */
-    PyObject *co_freevars;      /* tuple of strings (free variable names) */
     PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
-    /* The rest aren't used in either hash or comparisons, except for co_name,
-       used in both. This is done to preserve the name and line number
-       for tracebacks and debuggers; otherwise, constant de-duplication
-       would collapse identical functions/lambdas defined on different lines.
-    */
-    Py_ssize_t *co_cell2arg;    /* Maps cell vars which are arguments. */
+    PyObject *co_freevars;      /* tuple of strings (free variable names) */
     PyObject *co_filename;      /* unicode (where it was loaded from) */
     PyObject *co_name;          /* unicode (name, for reference) */
     PyObject *co_linetable;     /* string (encoding addr<->lineno mapping) See
                                    Objects/lnotab_notes.txt for details. */
-    int co_nlocalsplus;         /* Number of locals + free + cell variables */
     PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
+
+    /* These fields are set with computed values on new code objects. */
+
+    Py_ssize_t *co_cell2arg;    /* Maps cell vars which are arguments. */
+    // These are redundant but offer some performance benefit.
+    int co_nlocalsplus;         /* number of local + cell + free variables */
+    int co_nlocals;             /* number of local variables */
+    int co_ncellvars;           /* number of cell variables */
+    int co_nfreevars;           /* number of free variables */
+
+    /* The remaining fields are zeroed out on new code objects. */
+
     PyObject *co_weakreflist;   /* to support weakrefs to code objects */
     /* Scratch space for extra data relating to the code object.
        Type is a void* to keep the format private in codeobject.c to force
diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py
index c3dd678cd13b4..244acab560a84 100644
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -240,6 +240,7 @@ def func():
         # different co_name, co_varnames, co_consts
         def func2():
             y = 2
+            z = 3
             return y
         code2 = func2.__code__
 
@@ -247,14 +248,14 @@ def func2():
             ("co_argcount", 0),
             ("co_posonlyargcount", 0),
             ("co_kwonlyargcount", 0),
-            ("co_nlocals", 0),
+            ("co_nlocals", 1),
             ("co_stacksize", 0),
             ("co_flags", code.co_flags | inspect.CO_COROUTINE),
             ("co_firstlineno", 100),
             ("co_code", code2.co_code),
             ("co_consts", code2.co_consts),
             ("co_names", ("myname",)),
-            ("co_varnames", code2.co_varnames),
+            ("co_varnames", ('spam',)),
             ("co_freevars", ("freevar",)),
             ("co_cellvars", ("cellvar",)),
             ("co_filename", "newfilename"),
@@ -265,6 +266,47 @@ def func2():
                 new_code = code.replace(**{attr: value})
                 self.assertEqual(getattr(new_code, attr), value)
 
+        new_code = code.replace(co_varnames=code2.co_varnames,
+                                co_nlocals=code2.co_nlocals)
+        self.assertEqual(new_code.co_varnames, code2.co_varnames)
+        self.assertEqual(new_code.co_nlocals, code2.co_nlocals)
+
+    def test_nlocals_mismatch(self):
+        def func():
+            x = 1
+            return x
+        co = func.__code__
+        assert co.co_nlocals > 0;
+
+        # First we try the constructor.
+        CodeType = type(co)
+        for diff in (-1, 1):
+            with self.assertRaises(ValueError):
+                CodeType(co.co_argcount,
+                         co.co_posonlyargcount,
+                         co.co_kwonlyargcount,
+                         # This is the only change.
+                         co.co_nlocals + diff,
+                         co.co_stacksize,
+                         co.co_flags,
+                         co.co_code,
+                         co.co_consts,
+                         co.co_names,
+                         co.co_varnames,
+                         co.co_filename,
+                         co.co_name,
+                         co.co_firstlineno,
+                         co.co_lnotab,
+                         co.co_exceptiontable,
+                         co.co_freevars,
+                         co.co_cellvars,
+                         )
+        # Then we try the replace method.
+        with self.assertRaises(ValueError):
+            co.replace(co_nlocals=co.co_nlocals - 1)
+        with self.assertRaises(ValueError):
+            co.replace(co_nlocals=co.co_nlocals + 1)
+
     def test_empty_linetable(self):
         def func():
             pass
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 8b2cee2ea4abe..65df3e326334f 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -164,7 +164,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
 {
     PyCodeObject *co;
     Py_ssize_t *cell2arg = NULL;
-    Py_ssize_t i, n_cellvars, n_varnames, total_args;
+    Py_ssize_t i, total_args;
+    int ncellvars, nfreevars;
 
     /* Check argument types */
     if (argcount < posonlyargcount || posonlyargcount < 0 ||
@@ -184,6 +185,19 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
         return NULL;
     }
 
+    /* Make sure that code is indexable with an int, this is
+       a long running assumption in ceval.c and many parts of
+       the interpreter. */
+    if (PyBytes_GET_SIZE(code) > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX");
+        return NULL;
+    }
+
+    if (nlocals != PyTuple_GET_SIZE(varnames)) {
+        PyErr_SetString(PyExc_ValueError, "co_nlocals != len(co_varnames)");
+        return NULL;
+    }
+
     /* Ensure that strings are ready Unicode string */
     if (PyUnicode_READY(name) < 0) {
         return NULL;
@@ -208,46 +222,40 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
         return NULL;
     }
 
-    /* Make sure that code is indexable with an int, this is
-       a long running assumption in ceval.c and many parts of
-       the interpreter. */
-    if (PyBytes_GET_SIZE(code) > INT_MAX) {
-        PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX");
-        return NULL;
-    }
-
     /* Check for any inner or outer closure references */
-    n_cellvars = PyTuple_GET_SIZE(cellvars);
-    if (!n_cellvars && !PyTuple_GET_SIZE(freevars)) {
+    assert(PyTuple_GET_SIZE(cellvars) < INT_MAX);
+    ncellvars = (int)PyTuple_GET_SIZE(cellvars);
+    assert(PyTuple_GET_SIZE(freevars) < INT_MAX);
+    nfreevars = (int)PyTuple_GET_SIZE(freevars);
+    if (!ncellvars && !nfreevars) {
         flags |= CO_NOFREE;
     } else {
         flags &= ~CO_NOFREE;
     }
 
-    n_varnames = PyTuple_GET_SIZE(varnames);
-    if (argcount <= n_varnames && kwonlyargcount <= n_varnames) {
+    if (argcount <= nlocals && kwonlyargcount <= nlocals) {
         /* Never overflows. */
         total_args = (Py_ssize_t)argcount + (Py_ssize_t)kwonlyargcount +
                       ((flags & CO_VARARGS) != 0) + ((flags & CO_VARKEYWORDS) != 0);
     }
     else {
-        total_args = n_varnames + 1;
+        total_args = nlocals + 1;
     }
-    if (total_args > n_varnames) {
+    if (total_args > nlocals) {
         PyErr_SetString(PyExc_ValueError, "code: varnames is too small");
         return NULL;
     }
 
     /* Create mapping between cells and arguments if needed. */
-    if (n_cellvars) {
+    if (ncellvars) {
         bool used_cell2arg = false;
-        cell2arg = PyMem_NEW(Py_ssize_t, n_cellvars);
+        cell2arg = PyMem_NEW(Py_ssize_t, ncellvars);
         if (cell2arg == NULL) {
             PyErr_NoMemory();
             return NULL;
         }
         /* Find cells which are also arguments. */
-        for (i = 0; i < n_cellvars; i++) {
+        for (i = 0; i < ncellvars; i++) {
             Py_ssize_t j;
             PyObject *cell = PyTuple_GET_ITEM(cellvars, i);
             cell2arg[i] = CO_CELL_NOT_AN_ARG;
@@ -279,9 +287,10 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
     co->co_argcount = argcount;
     co->co_posonlyargcount = posonlyargcount;
     co->co_kwonlyargcount = kwonlyargcount;
+    co->co_nlocalsplus = nlocals + ncellvars + nfreevars;
     co->co_nlocals = nlocals;
-    co->co_nlocalsplus = nlocals +
-        (int)PyTuple_GET_SIZE(freevars) + (int)PyTuple_GET_SIZE(cellvars);
+    co->co_ncellvars = ncellvars;
+    co->co_nfreevars = nfreevars;
     co->co_stacksize = stacksize;
     co->co_flags = flags;
     Py_INCREF(code);
@@ -1139,7 +1148,7 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
     _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) co->co_extra;
 
     if (co->co_cell2arg != NULL && co->co_cellvars != NULL) {
-        res += PyTuple_GET_SIZE(co->co_cellvars) * sizeof(Py_ssize_t);
+        res += co->co_ncellvars * sizeof(Py_ssize_t);
     }
     if (co_extra != NULL) {
         res += sizeof(_PyCodeObjectExtra) +
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 87c485202c24e..1bfae90b9b2c8 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -1023,7 +1023,6 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
     PyObject **fast;
     PyCodeObject *co;
     Py_ssize_t j;
-    Py_ssize_t ncells, nfreevars;
 
     if (f == NULL) {
         PyErr_BadInternalCall();
@@ -1051,10 +1050,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
         if (map_to_dict(map, j, locals, fast, 0) < 0)
             return -1;
     }
-    ncells = PyTuple_GET_SIZE(co->co_cellvars);
-    nfreevars = PyTuple_GET_SIZE(co->co_freevars);
-    if (ncells || nfreevars) {
-        if (map_to_dict(co->co_cellvars, ncells,
+    if (co->co_ncellvars || co->co_nfreevars) {
+        if (map_to_dict(co->co_cellvars, co->co_ncellvars,
                         locals, fast + co->co_nlocals, 1))
             return -1;
 
@@ -1067,8 +1064,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
            into the locals dict used by the class.
         */
         if (co->co_flags & CO_OPTIMIZED) {
-            if (map_to_dict(co->co_freevars, nfreevars,
-                            locals, fast + co->co_nlocals + ncells, 1) < 0)
+            if (map_to_dict(co->co_freevars, co->co_nfreevars, locals,
+                            fast + co->co_nlocals + co->co_ncellvars, 1) < 0)
                 return -1;
         }
     }
@@ -1096,7 +1093,6 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
     PyObject *error_type, *error_value, *error_traceback;
     PyCodeObject *co;
     Py_ssize_t j;
-    Py_ssize_t ncells, nfreevars;
     if (f == NULL)
         return;
     locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
@@ -1113,16 +1109,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
         j = co->co_nlocals;
     if (co->co_nlocals)
         dict_to_map(co->co_varnames, j, locals, fast, 0, clear);
-    ncells = PyTuple_GET_SIZE(co->co_cellvars);
-    nfreevars = PyTuple_GET_SIZE(co->co_freevars);
-    if (ncells || nfreevars) {
-        dict_to_map(co->co_cellvars, ncells,
+    if (co->co_ncellvars || co->co_nfreevars) {
+        dict_to_map(co->co_cellvars, co->co_ncellvars,
                     locals, fast + co->co_nlocals, 1, clear);
         /* Same test as in PyFrame_FastToLocals() above. */
         if (co->co_flags & CO_OPTIMIZED) {
-            dict_to_map(co->co_freevars, nfreevars,
-                locals, fast + co->co_nlocals + ncells, 1,
-                clear);
+            dict_to_map(co->co_freevars, co->co_nfreevars, locals,
+                        fast + co->co_nlocals + co->co_ncellvars, 1,
+                        clear);
         }
     }
     PyErr_Restore(error_type, error_value, error_traceback);
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index f0b0b673d4fa4..dc2721593b321 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -279,7 +279,8 @@ func_get_code(PyFunctionObject *op, void *Py_UNUSED(ignored))
 static int
 func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored))
 {
-    Py_ssize_t nfree, nclosure;
+    Py_ssize_t nclosure;
+    int nfree;
 
     /* Not legal to del f.func_code or to set it to anything
      * other than a code object. */
@@ -294,7 +295,7 @@ func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored))
         return -1;
     }
 
-    nfree = PyCode_GetNumFree((PyCodeObject *)value);
+    nfree = ((PyCodeObject *)value)->co_nfreevars;
     nclosure = (op->func_closure == NULL ? 0 :
             PyTuple_GET_SIZE(op->func_closure));
     if (nclosure != nfree) {
@@ -538,7 +539,7 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
 /*[clinic end generated code: output=99c6d9da3a24e3be input=93611752fc2daf11]*/
 {
     PyFunctionObject *newfunc;
-    Py_ssize_t nfree, nclosure;
+    Py_ssize_t nclosure;
 
     if (name != Py_None && !PyUnicode_Check(name)) {
         PyErr_SetString(PyExc_TypeError,
@@ -550,9 +551,8 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
                         "arg 4 (defaults) must be None or tuple");
         return NULL;
     }
-    nfree = PyTuple_GET_SIZE(code->co_freevars);
     if (!PyTuple_Check(closure)) {
-        if (nfree && closure == Py_None) {
+        if (code->co_nfreevars && closure == Py_None) {
             PyErr_SetString(PyExc_TypeError,
                             "arg 5 (closure) must be tuple");
             return NULL;
@@ -566,10 +566,10 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
 
     /* check that the closure is well-formed */
     nclosure = closure == Py_None ? 0 : PyTuple_GET_SIZE(closure);
-    if (nfree != nclosure)
+    if (code->co_nfreevars != nclosure)
         return PyErr_Format(PyExc_ValueError,
                             "%U requires closure of length %zd, not %zd",
-                            code->co_name, nfree, nclosure);
+                            code->co_name, code->co_nfreevars, nclosure);
     if (nclosure) {
         Py_ssize_t i;
         for (i = 0; i < nclosure; i++) {
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 14600855de8bb..feb25aa0b78e4 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -8848,11 +8848,10 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
     }
 
     PyObject *obj = f->f_localsptr[0];
-    Py_ssize_t i, n;
+    Py_ssize_t i;
     if (obj == NULL && co->co_cell2arg) {
         /* The first argument might be a cell. */
-        n = PyTuple_GET_SIZE(co->co_cellvars);
-        for (i = 0; i < n; i++) {
+        for (i = 0; i < co->co_ncellvars; i++) {
             if (co->co_cell2arg[i] == 0) {
                 PyObject *cell = f->f_localsptr[co->co_nlocals + i];
                 assert(PyCell_Check(cell));
@@ -8867,21 +8866,12 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
         return -1;
     }
 
-    if (co->co_freevars == NULL) {
-        n = 0;
-    }
-    else {
-        assert(PyTuple_Check(co->co_freevars));
-        n = PyTuple_GET_SIZE(co->co_freevars);
-    }
-
     PyTypeObject *type = NULL;
-    for (i = 0; i < n; i++) {
+    for (i = 0; i < co->co_nfreevars; i++) {
         PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i);
         assert(PyUnicode_Check(name));
         if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) {
-            Py_ssize_t index = co->co_nlocals +
-                PyTuple_GET_SIZE(co->co_cellvars) + i;
+            Py_ssize_t index = co->co_nlocals + co->co_ncellvars + i;
             PyObject *cell = f->f_localsptr[index];
             if (cell == NULL || !PyCell_Check(cell)) {
                 PyErr_SetString(PyExc_RuntimeError,
diff --git a/Python/ceval.c b/Python/ceval.c
index e4a6a65fac43f..3bbcfe9c237d8 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3081,9 +3081,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             PyObject *name, *value, *locals = LOCALS();
             Py_ssize_t idx;
             assert(locals);
-            assert(oparg >= PyTuple_GET_SIZE(co->co_cellvars));
-            idx = oparg - PyTuple_GET_SIZE(co->co_cellvars);
-            assert(idx >= 0 && idx < PyTuple_GET_SIZE(co->co_freevars));
+            assert(oparg >= co->co_ncellvars);
+            idx = oparg - co->co_ncellvars;
+            assert(idx >= 0 && idx < co->co_nfreevars);
             name = PyTuple_GET_ITEM(co->co_freevars, idx);
             if (PyDict_CheckExact(locals)) {
                 value = PyDict_GetItemWithError(locals, name);
@@ -5062,7 +5062,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
 
     /* Allocate and initialize storage for cell vars, and copy free
        vars into frame. */
-    for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {
+    for (i = 0; i < co->co_ncellvars; ++i) {
         PyObject *c;
         Py_ssize_t arg;
         /* Possibly account for the cell variable being an argument. */
@@ -5081,10 +5081,10 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
     }
 
     /* Copy closure variables to free variables */
-    for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) {
+    for (i = 0; i < co->co_nfreevars; ++i) {
         PyObject *o = PyTuple_GET_ITEM(con->fc_closure, i);
         Py_INCREF(o);
-        freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o;
+        freevars[co->co_ncellvars + i] = o;
     }
 
     return 0;
@@ -6417,7 +6417,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg)
     /* Don't stomp existing exception */
     if (_PyErr_Occurred(tstate))
         return;
-    if (oparg < PyTuple_GET_SIZE(co->co_cellvars)) {
+    if (oparg < co->co_ncellvars) {
         name = PyTuple_GET_ITEM(co->co_cellvars,
                                 oparg);
         format_exc_check_arg(tstate,
@@ -6425,8 +6425,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg)
             UNBOUNDLOCAL_ERROR_MSG,
             name);
     } else {
-        name = PyTuple_GET_ITEM(co->co_freevars, oparg -
-                                PyTuple_GET_SIZE(co->co_cellvars));
+        name = PyTuple_GET_ITEM(co->co_freevars, oparg - co->co_ncellvars);
         format_exc_check_arg(tstate, PyExc_NameError,
                              UNBOUNDFREE_ERROR_MSG, name);
     }



More information about the Python-checkins mailing list