[Python-checkins] bpo-45395: Make custom frozen modules additions instead of replacements. (gh-28778)

ericsnowcurrently webhook-mailer at python.org
Thu Oct 28 17:04:43 EDT 2021


https://github.com/python/cpython/commit/074fa5750640a067d9894c69378a00ceecc3b948
commit: 074fa5750640a067d9894c69378a00ceecc3b948
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2021-10-28T15:04:33-06:00
summary:

bpo-45395: Make custom frozen modules additions instead of replacements. (gh-28778)

Currently custom modules (the array set on PyImport_FrozenModules) replace all the frozen stdlib modules. That can be problematic and is unlikely to be what the user wants. This change treats the custom frozen modules as additions instead. They take precedence over all other frozen modules except for those needed to bootstrap the import system. If the "code" field of an entry in the custom array is NULL then that frozen module is treated as disabled, which allows a custom entry to disable a frozen stdlib module.

This change allows us to get rid of is_essential_frozen_module() and simplifies the logic for which frozen modules should be ignored.

https://bugs.python.org/issue45395

files:
A Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst
M Doc/library/ctypes.rst
M Include/internal/pycore_import.h
M Lib/ctypes/test/test_values.py
M Programs/_freeze_module.c
M Programs/_testembed.c
M Python/frozen.c
M Python/import.c
M Tools/freeze/freeze.py
M Tools/scripts/freeze_modules.py

diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 87c9c66edd67c..74611144123b0 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -1095,7 +1095,7 @@ We have defined the :c:type:`struct _frozen` data type, so we can get the pointe
 to the table::
 
    >>> FrozenTable = POINTER(struct_frozen)
-   >>> table = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules")
+   >>> table = FrozenTable.in_dll(pythonapi, "_PyImport_FrozenBootstrap")
    >>>
 
 Since ``table`` is a ``pointer`` to the array of ``struct_frozen`` records, we
@@ -1111,9 +1111,7 @@ hit the ``NULL`` entry::
    ...
    _frozen_importlib 31764
    _frozen_importlib_external 41499
-   __hello__ 161
-   __phello__ -161
-   __phello__.spam 161
+   zipimport 12345
    >>>
 
 The fact that standard Python has a frozen module and a frozen package
diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h
index 6439b7369fb59..aee1f66a3ea17 100644
--- a/Include/internal/pycore_import.h
+++ b/Include/internal/pycore_import.h
@@ -15,6 +15,9 @@ struct _module_alias {
     const char *orig;                 /* ASCII encoded string */
 };
 
+PyAPI_DATA(const struct _frozen *) _PyImport_FrozenBootstrap;
+PyAPI_DATA(const struct _frozen *) _PyImport_FrozenStdlib;
+PyAPI_DATA(const struct _frozen *) _PyImport_FrozenTest;
 extern const struct _module_alias * _PyImport_FrozenAliases;
 
 #ifdef __cplusplus
diff --git a/Lib/ctypes/test/test_values.py b/Lib/ctypes/test/test_values.py
index 96521fd3abce9..5f9fa066c4a41 100644
--- a/Lib/ctypes/test/test_values.py
+++ b/Lib/ctypes/test/test_values.py
@@ -56,35 +56,37 @@ class struct_frozen(Structure):
                         ("size", c_int)]
         FrozenTable = POINTER(struct_frozen)
 
-        ft = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules")
-        # ft is a pointer to the struct_frozen entries:
         modules = []
-        for entry in ft:
-            # This is dangerous. We *can* iterate over a pointer, but
-            # the loop will not terminate (maybe with an access
-            # violation;-) because the pointer instance has no size.
-            if entry.name is None:
-                break
-            modname = entry.name.decode("ascii")
-            modules.append(modname)
-            with self.subTest(modname):
-                # Do a sanity check on entry.size and entry.code.
-                self.assertGreater(abs(entry.size), 10)
-                self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
-                # Check the module's package-ness.
-                with import_helper.frozen_modules():
-                    spec = importlib.util.find_spec(modname)
-                if entry.size < 0:
-                    # It's a package.
-                    self.assertIsNotNone(spec.submodule_search_locations)
-                else:
-                    self.assertIsNone(spec.submodule_search_locations)
+        for group in ["Bootstrap", "Stdlib", "Test"]:
+            ft = FrozenTable.in_dll(pythonapi, f"_PyImport_Frozen{group}")
+            # ft is a pointer to the struct_frozen entries:
+            for entry in ft:
+                # This is dangerous. We *can* iterate over a pointer, but
+                # the loop will not terminate (maybe with an access
+                # violation;-) because the pointer instance has no size.
+                if entry.name is None:
+                    break
+                modname = entry.name.decode("ascii")
+                modules.append(modname)
+                with self.subTest(modname):
+                    # Do a sanity check on entry.size and entry.code.
+                    self.assertGreater(abs(entry.size), 10)
+                    self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
+                    # Check the module's package-ness.
+                    with import_helper.frozen_modules():
+                        spec = importlib.util.find_spec(modname)
+                    if entry.size < 0:
+                        # It's a package.
+                        self.assertIsNotNone(spec.submodule_search_locations)
+                    else:
+                        self.assertIsNone(spec.submodule_search_locations)
 
         with import_helper.frozen_modules():
             expected = _imp._frozen_module_names()
         self.maxDiff = None
-        self.assertEqual(modules, expected, "PyImport_FrozenModules example "
-            "in Doc/library/ctypes.rst may be out of date")
+        self.assertEqual(modules, expected,
+                         "_PyImport_FrozenBootstrap example "
+                         "in Doc/library/ctypes.rst may be out of date")
 
         from ctypes import _pointer_type_cache
         del _pointer_type_cache[struct_frozen]
diff --git a/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst b/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst
new file mode 100644
index 0000000000000..8996513dc9fb0
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst	
@@ -0,0 +1,4 @@
+Custom frozen modules (the array set to ``PyImport_FrozenModules``) are now
+treated as additions, rather than replacing all the default frozen modules.
+Frozen stdlib modules can still be disabled by setting the "code" field of
+the custom array entry to NULL.
diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c
index 316c70d2c7824..e3f6c11c8b073 100644
--- a/Programs/_freeze_module.c
+++ b/Programs/_freeze_module.c
@@ -23,13 +23,16 @@
    of frozen modules instead, left deliberately blank so as to avoid
    unintentional import of a stale version of _frozen_importlib. */
 
-static const struct _frozen _PyImport_FrozenModules[] = {
+static const struct _frozen no_modules[] = {
     {0, 0, 0} /* sentinel */
 };
 static const struct _module_alias aliases[] = {
     {0, 0} /* sentinel */
 };
 
+const struct _frozen *_PyImport_FrozenBootstrap;
+const struct _frozen *_PyImport_FrozenStdlib;
+const struct _frozen *_PyImport_FrozenTest;
 const struct _frozen *PyImport_FrozenModules;
 const struct _module_alias *_PyImport_FrozenAliases;
 
@@ -188,7 +191,10 @@ main(int argc, char *argv[])
 {
     const char *name, *inpath, *outpath;
 
-    PyImport_FrozenModules = _PyImport_FrozenModules;
+    _PyImport_FrozenBootstrap = no_modules;
+    _PyImport_FrozenStdlib = no_modules;
+    _PyImport_FrozenTest = no_modules;
+    PyImport_FrozenModules = NULL;
     _PyImport_FrozenAliases = aliases;
 
     if (argc != 4) {
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index 773c6c3e9900a..6fe18d93a73ae 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -8,6 +8,7 @@
 #include <Python.h>
 #include "pycore_initconfig.h"    // _PyConfig_InitCompatConfig()
 #include "pycore_runtime.h"       // _PyRuntime
+#include "pycore_import.h"        // _PyImport_FrozenBootstrap
 #include <Python.h>
 #include <inttypes.h>
 #include <stdio.h>
@@ -1804,30 +1805,10 @@ static int test_unicode_id_init(void)
 
 static int test_frozenmain(void)
 {
-    // Get "_frozen_importlib" and "_frozen_importlib_external"
-    // from PyImport_FrozenModules
-    const struct _frozen *importlib = NULL, *importlib_external = NULL;
-    for (const struct _frozen *mod = PyImport_FrozenModules; mod->name != NULL; mod++) {
-        if (strcmp(mod->name, "_frozen_importlib") == 0) {
-            importlib = mod;
-        }
-        else if (strcmp(mod->name, "_frozen_importlib_external") == 0) {
-            importlib_external = mod;
-        }
-    }
-    if (importlib == NULL || importlib_external == NULL) {
-        error("cannot find frozen importlib and importlib_external");
-        return 1;
-    }
-
     static struct _frozen frozen_modules[4] = {
-        {0, 0, 0},  // importlib
-        {0, 0, 0},  // importlib_external
         {"__main__", M_test_frozenmain, sizeof(M_test_frozenmain)},
         {0, 0, 0}   // sentinel
     };
-    frozen_modules[0] = *importlib;
-    frozen_modules[1] = *importlib_external;
 
     char* argv[] = {
         "./argv0",
@@ -1846,7 +1827,12 @@ static int test_frozenmain(void)
 static int list_frozen(void)
 {
     const struct _frozen *p;
-    for (p = PyImport_FrozenModules; ; p++) {
+    for (p = _PyImport_FrozenBootstrap; ; p++) {
+        if (p->name == NULL)
+            break;
+        printf("%s\n", p->name);
+    }
+    for (p = _PyImport_FrozenStdlib; ; p++) {
         if (p->name == NULL)
             break;
         printf("%s\n", p->name);
diff --git a/Python/frozen.c b/Python/frozen.c
index 499b3b9957057..15baa97b9d055 100644
--- a/Python/frozen.c
+++ b/Python/frozen.c
@@ -63,14 +63,15 @@
 
 /* Note that a negative size indicates a package. */
 
-static const struct _frozen _PyImport_FrozenModules[] = {
-    /* import system */
+static const struct _frozen bootstrap_modules[] = {
     {"_frozen_importlib", _Py_M__importlib__bootstrap,
         (int)sizeof(_Py_M__importlib__bootstrap)},
     {"_frozen_importlib_external", _Py_M__importlib__bootstrap_external,
         (int)sizeof(_Py_M__importlib__bootstrap_external)},
     {"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)},
-
+    {0, 0, 0} /* bootstrap sentinel */
+};
+static const struct _frozen stdlib_modules[] = {
     /* stdlib - startup, without site (python -S) */
     {"abc", _Py_M__abc, (int)sizeof(_Py_M__abc)},
     {"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs)},
@@ -87,8 +88,9 @@ static const struct _frozen _PyImport_FrozenModules[] = {
     {"os", _Py_M__os, (int)sizeof(_Py_M__os)},
     {"site", _Py_M__site, (int)sizeof(_Py_M__site)},
     {"stat", _Py_M__stat, (int)sizeof(_Py_M__stat)},
-
-    /* Test module */
+    {0, 0, 0} /* stdlib sentinel */
+};
+static const struct _frozen test_modules[] = {
     {"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)},
     {"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)},
     {"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__)},
@@ -103,8 +105,11 @@ static const struct _frozen _PyImport_FrozenModules[] = {
     {"__phello__.spam", _Py_M____phello___spam,
         (int)sizeof(_Py_M____phello___spam)},
     {"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)},
-    {0, 0, 0} /* modules sentinel */
+    {0, 0, 0} /* test sentinel */
 };
+const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;
+const struct _frozen *_PyImport_FrozenStdlib = stdlib_modules;
+const struct _frozen *_PyImport_FrozenTest = test_modules;
 
 static const struct _module_alias aliases[] = {
     {"_frozen_importlib", "importlib._bootstrap"},
@@ -124,4 +129,4 @@ const struct _module_alias *_PyImport_FrozenAliases = aliases;
 /* Embedding apps may change this pointer to point to their favorite
    collection of frozen modules: */
 
-const struct _frozen *PyImport_FrozenModules = _PyImport_FrozenModules;
+const struct _frozen *PyImport_FrozenModules = NULL;
diff --git a/Python/import.c b/Python/import.c
index 15b1956c102df..48ea9129163c8 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1070,27 +1070,6 @@ resolve_module_alias(const char *name, const struct _module_alias *aliases,
 
 /* Frozen modules */
 
-static bool
-is_essential_frozen_module(const char *name)
-{
-    /* These modules are necessary to bootstrap the import system. */
-    if (strcmp(name, "_frozen_importlib") == 0) {
-        return true;
-    }
-    if (strcmp(name, "_frozen_importlib_external") == 0) {
-        return true;
-    }
-    if (strcmp(name, "zipimport") == 0) {
-        return true;
-    }
-    /* This doesn't otherwise have anywhere to find the module.
-       See frozenmain.c. */
-    if (strcmp(name, "__main__") == 0) {
-        return true;
-    }
-    return false;
-}
-
 static bool
 use_frozen(void)
 {
@@ -1115,26 +1094,76 @@ list_frozen_module_names()
         return NULL;
     }
     bool enabled = use_frozen();
-    for (const struct _frozen *p = PyImport_FrozenModules; ; p++) {
+    const struct _frozen *p;
+#define ADD_MODULE(name) \
+    do { \
+        PyObject *nameobj = PyUnicode_FromString(name); \
+        if (nameobj == NULL) { \
+            goto error; \
+        } \
+        int res = PyList_Append(names, nameobj); \
+        Py_DECREF(nameobj); \
+        if (res != 0) { \
+            goto error; \
+        } \
+    } while(0)
+    // We always use the bootstrap modules.
+    for (p = _PyImport_FrozenBootstrap; ; p++) {
         if (p->name == NULL) {
             break;
         }
-        if (!enabled && !is_essential_frozen_module(p->name)) {
-            continue;
+        ADD_MODULE(p->name);
+    }
+    // Frozen stdlib modules may be disabled.
+    for (p = _PyImport_FrozenStdlib; ; p++) {
+        if (p->name == NULL) {
+            break;
         }
-        PyObject *name = PyUnicode_FromString(p->name);
-        if (name == NULL) {
-            Py_DECREF(names);
-            return NULL;
+        if (enabled) {
+            ADD_MODULE(p->name);
         }
-        int res = PyList_Append(names, name);
-        Py_DECREF(name);
-        if (res != 0) {
-            Py_DECREF(names);
-            return NULL;
+    }
+    for (p = _PyImport_FrozenTest; ; p++) {
+        if (p->name == NULL) {
+            break;
+        }
+        if (enabled) {
+            ADD_MODULE(p->name);
+        }
+    }
+#undef ADD_MODULE
+    // Add any custom modules.
+    if (PyImport_FrozenModules != NULL) {
+        for (p = PyImport_FrozenModules; ; p++) {
+            if (p->name == NULL) {
+                break;
+            }
+            PyObject *nameobj = PyUnicode_FromString(p->name);
+            if (nameobj == NULL) {
+                goto error;
+            }
+            int found = PySequence_Contains(names, nameobj);
+            if (found < 0) {
+                Py_DECREF(nameobj);
+                goto error;
+            }
+            else if (found) {
+                Py_DECREF(nameobj);
+            }
+            else {
+                int res = PyList_Append(names, nameobj);
+                Py_DECREF(nameobj);
+                if (res != 0) {
+                    goto error;
+                }
+            }
         }
     }
     return names;
+
+error:
+    Py_DECREF(names);
+    return NULL;
 }
 
 typedef enum {
@@ -1180,6 +1209,54 @@ set_frozen_error(frozen_status status, PyObject *modname)
     }
 }
 
+static const struct _frozen *
+look_up_frozen(const char *name)
+{
+    const struct _frozen *p;
+    // We always use the bootstrap modules.
+    for (p = _PyImport_FrozenBootstrap; ; p++) {
+        if (p->name == NULL) {
+            // We hit the end-of-list sentinel value.
+            break;
+        }
+        if (strcmp(name, p->name) == 0) {
+            return p;
+        }
+    }
+    // Prefer custom modules, if any.  Frozen stdlib modules can be
+    // disabled here by setting "code" to NULL in the array entry.
+    if (PyImport_FrozenModules != NULL) {
+        for (p = PyImport_FrozenModules; ; p++) {
+            if (p->name == NULL) {
+                break;
+            }
+            if (strcmp(name, p->name) == 0) {
+                return p;
+            }
+        }
+    }
+    // Frozen stdlib modules may be disabled.
+    if (use_frozen()) {
+        for (p = _PyImport_FrozenStdlib; ; p++) {
+            if (p->name == NULL) {
+                break;
+            }
+            if (strcmp(name, p->name) == 0) {
+                return p;
+            }
+        }
+        for (p = _PyImport_FrozenTest; ; p++) {
+            if (p->name == NULL) {
+                break;
+            }
+            if (strcmp(name, p->name) == 0) {
+                return p;
+            }
+        }
+    }
+    return NULL;
+}
+
 struct frozen_info {
     PyObject *nameobj;
     const char *data;
@@ -1209,19 +1286,9 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
         return FROZEN_BAD_NAME;
     }
 
-    if (!use_frozen() && !is_essential_frozen_module(name)) {
-        return FROZEN_DISABLED;
-    }
-
-    const struct _frozen *p;
-    for (p = PyImport_FrozenModules; ; p++) {
-        if (p->name == NULL) {
-            // We hit the end-of-list sentinel value.
-            return FROZEN_NOT_FOUND;
-        }
-        if (strcmp(name, p->name) == 0) {
-            break;
-        }
+    const struct _frozen *p = look_up_frozen(name);
+    if (p == NULL) {
+        return FROZEN_NOT_FOUND;
     }
     if (info != NULL) {
         info->nameobj = nameobj;  // borrowed
diff --git a/Tools/freeze/freeze.py b/Tools/freeze/freeze.py
index d66e1e2708e75..bc5e43f4853de 100755
--- a/Tools/freeze/freeze.py
+++ b/Tools/freeze/freeze.py
@@ -367,12 +367,6 @@ def main():
         else:
             mf.load_file(mod)
 
-    # Alias "importlib._bootstrap" to "_frozen_importlib" so that the
-    # import machinery can bootstrap.  Do the same for
-    # importlib._bootstrap_external.
-    mf.modules["_frozen_importlib"] = mf.modules["importlib._bootstrap"]
-    mf.modules["_frozen_importlib_external"] = mf.modules["importlib._bootstrap_external"]
-
     # Add the main script as either __main__, or the actual module name.
     if python_entry_is_main:
         mf.run_script(scriptfile)
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
index 5c7eee4295289..36142625ca609 100644
--- a/Tools/scripts/freeze_modules.py
+++ b/Tools/scripts/freeze_modules.py
@@ -60,6 +60,7 @@ def find_tool():
 OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
 
 # These are modules that get frozen.
+TESTS_SECTION = 'Test module'
 FROZEN = [
     # See parse_frozen_spec() for the format.
     # In cases where the frozenid is duplicated, the first one is re-used.
@@ -94,7 +95,7 @@ def find_tool():
         'site',
         'stat',
         ]),
-    ('Test module', [
+    (TESTS_SECTION, [
         '__hello__',
         '__hello__ : __hello_alias__',
         '__hello__ : <__phello_alias__>',
@@ -103,7 +104,7 @@ def find_tool():
         f'frozen_only : __hello_only__ = {FROZEN_ONLY}',
         ]),
 ]
-ESSENTIAL = {
+BOOTSTRAP = {
     'importlib._bootstrap',
     'importlib._bootstrap_external',
     'zipimport',
@@ -527,16 +528,24 @@ def regen_frozen(modules):
         header = relpath_for_posix_display(src.frozenfile, parentdir)
         headerlines.append(f'#include "{header}"')
 
-    deflines = []
+    bootstraplines = []
+    stdliblines = []
+    testlines = []
     aliaslines = []
     indent = '    '
     lastsection = None
     for mod in modules:
-        if mod.section != lastsection:
-            if lastsection is not None:
-                deflines.append('')
-            deflines.append(f'/* {mod.section} */')
-        lastsection = mod.section
+        if mod.frozenid in BOOTSTRAP:
+            lines = bootstraplines
+        elif mod.section == TESTS_SECTION:
+            lines = testlines
+        else:
+            lines = stdliblines
+            if mod.section != lastsection:
+                if lastsection is not None:
+                    lines.append('')
+                lines.append(f'/* {mod.section} */')
+            lastsection = mod.section
 
         symbol = mod.symbol
         pkg = '-' if mod.ispkg else ''
@@ -544,11 +553,11 @@ def regen_frozen(modules):
                 ) % (mod.name, symbol, pkg, symbol)
         # TODO: Consider not folding lines
         if len(line) < 80:
-            deflines.append(line)
+            lines.append(line)
         else:
             line1, _, line2 = line.rpartition(' ')
-            deflines.append(line1)
-            deflines.append(indent + line2)
+            lines.append(line1)
+            lines.append(indent + line2)
 
         if mod.isalias:
             if not mod.orig:
@@ -559,11 +568,13 @@ def regen_frozen(modules):
                 entry = '{"%s", "%s"},' % (mod.name, mod.orig)
             aliaslines.append(indent + entry)
 
-    if not deflines[0]:
-        del deflines[0]
-    for i, line in enumerate(deflines):
-        if line:
-            deflines[i] = indent + line
+    for lines in (bootstraplines, stdliblines, testlines):
+        # TODO: Is this necessary any more?
+        if not lines[0]:
+            del lines[0]
+        for i, line in enumerate(lines):
+            if line:
+                lines[i] = indent + line
 
     print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
     with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
@@ -579,9 +590,23 @@ def regen_frozen(modules):
         )
         lines = replace_block(
             lines,
-            "static const struct _frozen _PyImport_FrozenModules[] =",
-            "/* modules sentinel */",
-            deflines,
+            "static const struct _frozen bootstrap_modules[] =",
+            "/* bootstrap sentinel */",
+            bootstraplines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "static const struct _frozen stdlib_modules[] =",
+            "/* stdlib sentinel */",
+            stdliblines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "static const struct _frozen test_modules[] =",
+            "/* test sentinel */",
+            testlines,
             FROZEN_FILE,
         )
         lines = replace_block(



More information about the Python-checkins mailing list