[Python-checkins] bpo-45019: Do some cleanup related to frozen modules. (gh-28319)

ericsnowcurrently webhook-mailer at python.org
Mon Sep 13 18:18:46 EDT 2021


https://github.com/python/cpython/commit/a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a
commit: a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2021-09-13T16:18:37-06:00
summary:

bpo-45019: Do some cleanup related to frozen modules. (gh-28319)

There are a few things I missed in gh-27980. This is a follow-up that will make subsequent PRs cleaner. It includes fixes to tests and tools that reference the frozen modules.

https://bugs.python.org/issue45019

files:
A Python/frozen_modules/MANIFEST
A Python/frozen_modules/README.txt
M .gitattributes
M .gitignore
M Lib/ctypes/test/test_values.py
M Lib/imp.py
M Makefile.pre.in
M Python/clinic/import.c.h
M Python/frozen.c
M Python/import.c
M Tools/scripts/freeze_modules.py
M Tools/scripts/generate_stdlib_module_names.py

diff --git a/.gitattributes b/.gitattributes
index b9c08cdd7d65a..cf8d7822e522c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -47,6 +47,7 @@ Objects/clinic/*.h          linguist-generated=true
 PC/clinic/*.h               linguist-generated=true
 Python/clinic/*.h           linguist-generated=true
 Python/frozen_modules/*.h   linguist-generated=true
+Python/frozen_modules/MANIFEST  linguist-generated=true
 Include/internal/pycore_ast.h   linguist-generated=true
 Python/Python-ast.c         linguist-generated=true
 Include/opcode.h            linguist-generated=true
diff --git a/.gitignore b/.gitignore
index 0ed4c8bdd0ccf..2182ac1fe0c75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,6 +121,13 @@ Tools/msi/obj
 Tools/ssl/amd64
 Tools/ssl/win32
 
+# TODO: Once we auto-regen frozem modules for Windows builds
+# we can drop the .h files from the repo and ignore them here.
+# At that point we will rely the frozen manifest file to identify
+# changed generated files.  We'll drop the entry for it then.
+# See: Tools/scripts/freeze_modules.py.
+#Python/frozen_modules/*.h
+
 # Two-trick pony for OSX and other case insensitive file systems:
 # Ignore ./python binary on Unix but still look into ./Python/ directory.
 /python
diff --git a/Lib/ctypes/test/test_values.py b/Lib/ctypes/test/test_values.py
index 96a5f7cc1c530..aa31d44184145 100644
--- a/Lib/ctypes/test/test_values.py
+++ b/Lib/ctypes/test/test_values.py
@@ -2,9 +2,12 @@
 A testcase which accesses *values* in a dll.
 """
 
+import imp
+import importlib.util
 import unittest
 import sys
 from ctypes import *
+from test.support import import_helper, captured_stdout
 
 import _ctypes_test
 
@@ -55,41 +58,32 @@ class struct_frozen(Structure):
 
         ft = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules")
         # ft is a pointer to the struct_frozen entries:
-        items = []
-        # _frozen_importlib changes size whenever importlib._bootstrap
-        # changes, so it gets a special case.  We should make sure it's
-        # found, but don't worry about its size too much.  The same
-        # applies to _frozen_importlib_external.
-        bootstrap_seen = []
-        bootstrap_expected = [
-                b'_frozen_importlib',
-                b'_frozen_importlib_external',
-                b'zipimport',
-                ]
+        modules = []
         for entry in ft:
             # This is dangerous. We *can* iterate over a pointer, but
             # the loop will not terminate (maybe with an access
             # violation;-) because the pointer instance has no size.
             if entry.name is None:
                 break
-
-            if entry.name in bootstrap_expected:
-                bootstrap_seen.append(entry.name)
-                self.assertTrue(entry.size,
-                    "{!r} was reported as having no size".format(entry.name))
-                continue
-            items.append((entry.name.decode("ascii"), entry.size))
-
-        expected = [("__hello__", 164),
-                    ("__phello__", -164),
-                    ("__phello__.spam", 164),
-                    ]
-        self.assertEqual(items, expected, "PyImport_FrozenModules example "
+            modname = entry.name.decode("ascii")
+            modules.append(modname)
+            with self.subTest(modname):
+                # Do a sanity check on entry.size and entry.code.
+                self.assertGreater(abs(entry.size), 10)
+                self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
+                # Check the module's package-ness.
+                spec = importlib.util.find_spec(modname)
+                if entry.size < 0:
+                    # It's a package.
+                    self.assertIsNotNone(spec.submodule_search_locations)
+                else:
+                    self.assertIsNone(spec.submodule_search_locations)
+
+        expected = imp._frozen_module_names()
+        self.maxDiff = None
+        self.assertEqual(modules, expected, "PyImport_FrozenModules example "
             "in Doc/library/ctypes.rst may be out of date")
 
-        self.assertEqual(sorted(bootstrap_seen), bootstrap_expected,
-            "frozen bootstrap modules did not match PyImport_FrozenModules")
-
         from ctypes import _pointer_type_cache
         del _pointer_type_cache[struct_frozen]
 
diff --git a/Lib/imp.py b/Lib/imp.py
index 71c5c8fc6a510..fc42c15765852 100644
--- a/Lib/imp.py
+++ b/Lib/imp.py
@@ -9,7 +9,7 @@
 from _imp import (lock_held, acquire_lock, release_lock,
                   get_frozen_object, is_frozen_package,
                   init_frozen, is_builtin, is_frozen,
-                  _fix_co_filename)
+                  _fix_co_filename, _frozen_module_names)
 try:
     from _imp import create_dynamic
 except ImportError:
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 804d0192bc5fd..e7005befcd3cc 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -750,22 +750,22 @@ regen-frozen: Tools/scripts/freeze_modules.py $(FROZEN_FILES)
 
 # BEGIN: freezing modules
 
-Python/frozen_modules/importlib__bootstrap.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap.py
+Python/frozen_modules/importlib__bootstrap.h: Programs/_freeze_module Lib/importlib/_bootstrap.py
 	$(srcdir)/Programs/_freeze_module importlib._bootstrap \
 		$(srcdir)/Lib/importlib/_bootstrap.py \
 		$(srcdir)/Python/frozen_modules/importlib__bootstrap.h
 
-Python/frozen_modules/importlib__bootstrap_external.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/importlib/_bootstrap_external.py
+Python/frozen_modules/importlib__bootstrap_external.h: Programs/_freeze_module Lib/importlib/_bootstrap_external.py
 	$(srcdir)/Programs/_freeze_module importlib._bootstrap_external \
 		$(srcdir)/Lib/importlib/_bootstrap_external.py \
 		$(srcdir)/Python/frozen_modules/importlib__bootstrap_external.h
 
-Python/frozen_modules/zipimport.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Lib/zipimport.py
+Python/frozen_modules/zipimport.h: Programs/_freeze_module Lib/zipimport.py
 	$(srcdir)/Programs/_freeze_module zipimport \
 		$(srcdir)/Lib/zipimport.py \
 		$(srcdir)/Python/frozen_modules/zipimport.h
 
-Python/frozen_modules/hello.h: $(srcdir)/Programs/_freeze_module $(srcdir)/Tools/freeze/flag.py
+Python/frozen_modules/hello.h: Programs/_freeze_module Tools/freeze/flag.py
 	$(srcdir)/Programs/_freeze_module hello \
 		$(srcdir)/Tools/freeze/flag.py \
 		$(srcdir)/Python/frozen_modules/hello.h
diff --git a/Python/clinic/import.c.h b/Python/clinic/import.c.h
index 4e013cc97d6b9..ec4ebca36d943 100644
--- a/Python/clinic/import.c.h
+++ b/Python/clinic/import.c.h
@@ -297,6 +297,24 @@ _imp_is_frozen(PyObject *module, PyObject *arg)
     return return_value;
 }
 
+PyDoc_STRVAR(_imp__frozen_module_names__doc__,
+"_frozen_module_names($module, /)\n"
+"--\n"
+"\n"
+"Returns the list of available frozen modules.");
+
+#define _IMP__FROZEN_MODULE_NAMES_METHODDEF    \
+    {"_frozen_module_names", (PyCFunction)_imp__frozen_module_names, METH_NOARGS, _imp__frozen_module_names__doc__},
+
+static PyObject *
+_imp__frozen_module_names_impl(PyObject *module);
+
+static PyObject *
+_imp__frozen_module_names(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+    return _imp__frozen_module_names_impl(module);
+}
+
 #if defined(HAVE_DYNAMIC_LOADING)
 
 PyDoc_STRVAR(_imp_create_dynamic__doc__,
@@ -449,4 +467,4 @@ _imp_source_hash(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb
 #ifndef _IMP_EXEC_DYNAMIC_METHODDEF
     #define _IMP_EXEC_DYNAMIC_METHODDEF
 #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
-/*[clinic end generated code: output=7c31c433af88af6b input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0ab3fa7c5808bba4 input=a9049054013a1b77]*/
diff --git a/Python/frozen.c b/Python/frozen.c
index 67aff2ed2eba1..2975b1fcbe7e2 100644
--- a/Python/frozen.c
+++ b/Python/frozen.c
@@ -47,7 +47,7 @@
 /* Note that a negative size indicates a package. */
 
 static const struct _frozen _PyImport_FrozenModules[] = {
-    /* importlib */
+    /* import system */
     {"_frozen_importlib", _Py_M__importlib__bootstrap,
         (int)sizeof(_Py_M__importlib__bootstrap)},
     {"_frozen_importlib_external", _Py_M__importlib__bootstrap_external,
diff --git a/Python/frozen_modules/MANIFEST b/Python/frozen_modules/MANIFEST
new file mode 100644
index 0000000000000..42c72b984327f
--- /dev/null
+++ b/Python/frozen_modules/MANIFEST
@@ -0,0 +1,12 @@
+# The list of frozen modules with key information.
+# Note that the "check_generated_files" CI job will identify
+# when source files were changed but regen-frozen wasn't run.
+# This file is auto-generated by Tools/scripts/freeze_modules.py.
+          module           ispkg              source                          frozen               checksum
+-------------------------- ----- ------------------------------- ------------------------------- ------------
+_frozen_importlib            no  <importlib._bootstrap>          importlib__bootstrap.h          749d553f858d
+_frozen_importlib_external   no  <importlib._bootstrap_external> importlib__bootstrap_external.h e4539e6347d7
+zipimport                    no  <zipimport>                     zipimport.h                     374879e5d43d
+__hello__                    no  Tools/freeze/flag.py            hello.h                         af6fb665713f
+__phello__                  YES  Tools/freeze/flag.py            hello.h                         af6fb665713f
+__phello__.spam              no  Tools/freeze/flag.py            hello.h                         af6fb665713f
diff --git a/Python/frozen_modules/README.txt b/Python/frozen_modules/README.txt
new file mode 100644
index 0000000000000..444167cc496af
--- /dev/null
+++ b/Python/frozen_modules/README.txt
@@ -0,0 +1,7 @@
+This directory contains the generated .h files for all the frozen
+modules.  Python/frozen.c depends on these files.
+
+Note that, other than the required frozen modules, none of these files
+are committed into the repo.
+
+See Tools/scripts/freeze_modules.py for more info.
diff --git a/Python/import.c b/Python/import.c
index 7301fccb9fac0..d896ff476e179 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -16,6 +16,7 @@
 #include "code.h"
 #include "importdl.h"
 #include "pydtrace.h"
+#include <stdbool.h>
 
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
@@ -1049,6 +1050,32 @@ _imp_create_builtin(PyObject *module, PyObject *spec)
 
 /* Frozen modules */
 
+static PyObject *
+list_frozen_module_names(bool force)
+{
+    PyObject *names = PyList_New(0);
+    if (names == NULL) {
+        return NULL;
+    }
+    for (const struct _frozen *p = PyImport_FrozenModules; ; p++) {
+        if (p->name == NULL) {
+            break;
+        }
+        PyObject *name = PyUnicode_FromString(p->name);
+        if (name == NULL) {
+            Py_DECREF(names);
+            return NULL;
+        }
+        int res = PyList_Append(names, name);
+        Py_DECREF(name);
+        if (res != 0) {
+            Py_DECREF(names);
+            return NULL;
+        }
+    }
+    return names;
+}
+
 static const struct _frozen *
 find_frozen(PyObject *name)
 {
@@ -1954,6 +1981,19 @@ _imp_is_frozen_impl(PyObject *module, PyObject *name)
     return PyBool_FromLong((long) (p == NULL ? 0 : p->size));
 }
 
+/*[clinic input]
+_imp._frozen_module_names
+
+Returns the list of available frozen modules.
+[clinic start generated code]*/
+
+static PyObject *
+_imp__frozen_module_names_impl(PyObject *module)
+/*[clinic end generated code: output=80609ef6256310a8 input=76237fbfa94460d2]*/
+{
+    return list_frozen_module_names(true);
+}
+
 /* Common implementation for _imp.exec_dynamic and _imp.exec_builtin */
 static int
 exec_builtin_or_dynamic(PyObject *mod) {
@@ -2114,6 +2154,7 @@ static PyMethodDef imp_methods[] = {
     _IMP_INIT_FROZEN_METHODDEF
     _IMP_IS_BUILTIN_METHODDEF
     _IMP_IS_FROZEN_METHODDEF
+    _IMP__FROZEN_MODULE_NAMES_METHODDEF
     _IMP_CREATE_DYNAMIC_METHODDEF
     _IMP_EXEC_DYNAMIC_METHODDEF
     _IMP_EXEC_BUILTIN_METHODDEF
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
index 4f60e1b9a3a8b..b7e5320ae2d1b 100644
--- a/Tools/scripts/freeze_modules.py
+++ b/Tools/scripts/freeze_modules.py
@@ -3,6 +3,8 @@
 See the notes at the top of Python/frozen.c for more info.
 """
 
+from collections import namedtuple
+import hashlib
 import os
 import os.path
 import subprocess
@@ -21,18 +23,24 @@
 MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules')
 TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module')
 
+MANIFEST = os.path.join(MODULES_DIR, 'MANIFEST')
 FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
 MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
 PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
 PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
+TEST_CTYPES = os.path.join(STDLIB_DIR, 'ctypes', 'test', 'test_values.py')
 
 # These are modules that get frozen.
 FROZEN = [
     # See parse_frozen_spec() for the format.
     # In cases where the frozenid is duplicated, the first one is re-used.
-    ('importlib', [
+    ('import system', [
+        # These frozen modules are necessary for bootstrapping
+        # the import system.
         'importlib._bootstrap : _frozen_importlib',
         'importlib._bootstrap_external : _frozen_importlib_external',
+        # This module is important because some Python builds rely
+        # on a builtin zip file instead of a filesystem.
         'zipimport',
         ]),
     ('Test module', [
@@ -41,13 +49,43 @@
         'hello : __phello__.spam',
         ]),
 ]
+ESSENTIAL = {
+    'importlib._bootstrap',
+    'importlib._bootstrap_external',
+    'zipimport',
+}
 
 
 #######################################
 # specs
 
-def parse_frozen_spec(rawspec, knownids=None, section=None):
-    """Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules.
+def parse_frozen_specs(sectionalspecs=FROZEN, destdir=None):
+    seen = {}
+    for section, specs in sectionalspecs:
+        parsed = _parse_specs(specs, section, seen)
+        for frozenid, pyfile, modname, ispkg, section in parsed:
+            try:
+                source = seen[frozenid]
+            except KeyError:
+                source = FrozenSource.from_id(frozenid, pyfile, destdir)
+                seen[frozenid] = source
+            else:
+                assert not pyfile
+            yield FrozenModule(modname, ispkg, section, source)
+
+
+def _parse_specs(specs, section, seen):
+    for spec in specs:
+        info, subs = _parse_spec(spec, seen, section)
+        yield info
+        for info in subs or ():
+            yield info
+
+
+def _parse_spec(spec, knownids=None, section=None):
+    """Yield an info tuple for each module corresponding to the given spec.
+
+    The info consists of: (frozenid, pyfile, modname, ispkg, section).
 
     Supported formats:
 
@@ -74,7 +112,7 @@ def parse_frozen_spec(rawspec, knownids=None, section=None):
     Also, if "modname" has brackets then "frozenid" should not,
     and "pyfile" should have been provided..
     """
-    frozenid, _, remainder = rawspec.partition(':')
+    frozenid, _, remainder = spec.partition(':')
     modname, _, pyfile = remainder.partition('=')
     frozenid = frozenid.strip()
     modname = modname.strip()
@@ -82,28 +120,28 @@ def parse_frozen_spec(rawspec, knownids=None, section=None):
 
     submodules = None
     if modname.startswith('<') and modname.endswith('>'):
-        assert check_modname(frozenid), rawspec
+        assert check_modname(frozenid), spec
         modname = modname[1:-1]
-        assert check_modname(modname), rawspec
+        assert check_modname(modname), spec
         if frozenid in knownids:
             pass
         elif pyfile:
-            assert not os.path.isdir(pyfile), rawspec
+            assert not os.path.isdir(pyfile), spec
         else:
             pyfile = _resolve_module(frozenid, ispkg=False)
         ispkg = True
     elif pyfile:
-        assert check_modname(frozenid), rawspec
-        assert not knownids or frozenid not in knownids, rawspec
-        assert check_modname(modname), rawspec
-        assert not os.path.isdir(pyfile), rawspec
+        assert check_modname(frozenid), spec
+        assert not knownids or frozenid not in knownids, spec
+        assert check_modname(modname), spec
+        assert not os.path.isdir(pyfile), spec
         ispkg = False
     elif knownids and frozenid in knownids:
-        assert check_modname(frozenid), rawspec
-        assert check_modname(modname), rawspec
+        assert check_modname(frozenid), spec
+        assert check_modname(modname), spec
         ispkg = False
     else:
-        assert not modname or check_modname(modname), rawspec
+        assert not modname or check_modname(modname), spec
         resolved = iter(resolve_modules(frozenid))
         frozenid, pyfile, ispkg = next(resolved)
         if not modname:
@@ -113,7 +151,7 @@ def parse_frozen_spec(rawspec, knownids=None, section=None):
             pkgname = modname
             def iter_subs():
                 for frozenid, pyfile, ispkg in resolved:
-                    assert not knownids or frozenid not in knownids, (frozenid, rawspec)
+                    assert not knownids or frozenid not in knownids, (frozenid, spec)
                     if pkgname:
                         modname = frozenid.replace(pkgid, pkgname, 1)
                     else:
@@ -121,59 +159,104 @@ def iter_subs():
                     yield frozenid, pyfile, modname, ispkg, section
             submodules = iter_subs()
 
-    spec = (frozenid, pyfile or None, modname, ispkg, section)
-    return spec, submodules
+    info = (frozenid, pyfile or None, modname, ispkg, section)
+    return info, submodules
 
 
-def parse_frozen_specs(rawspecs=FROZEN):
-    seen = set()
-    for section, _specs in rawspecs:
-        for spec in _parse_frozen_specs(_specs, section, seen):
-            frozenid = spec[0]
-            yield spec
-            seen.add(frozenid)
+#######################################
+# frozen source files
 
+class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile')):
 
-def _parse_frozen_specs(rawspecs, section, seen):
-    for rawspec in rawspecs:
-        spec, subs = parse_frozen_spec(rawspec, seen, section)
-        yield spec
-        for spec in subs or ():
-            yield spec
+    @classmethod
+    def from_id(cls, frozenid, pyfile=None, destdir=MODULES_DIR):
+        if not pyfile:
+            pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py'
+            #assert os.path.exists(pyfile), (frozenid, pyfile)
+        frozenfile = resolve_frozen_file(frozenid, destdir)
+        return cls(frozenid, pyfile, frozenfile)
 
+    @property
+    def frozenid(self):
+        return self.id
 
-def resolve_frozen_file(spec, destdir=MODULES_DIR):
-    if isinstance(spec, str):
-        modname = spec
-    else:
-        _, frozenid, _, _, _= spec
-        modname = frozenid
+    @property
+    def modname(self):
+        if self.pyfile.startswith(STDLIB_DIR):
+            return self.id
+        return None
+
+    @property
+    def symbol(self):
+        # This matches what we do in Programs/_freeze_module.c:
+        name = self.frozenid.replace('.', '_')
+        return '_Py_M__' + name
+
+
+def resolve_frozen_file(frozenid, destdir=MODULES_DIR):
+    """Return the filename corresponding to the given frozen ID.
+
+    For stdlib modules the ID will always be the full name
+    of the source module.
+    """
+    if not isinstance(frozenid, str):
+        try:
+            frozenid = frozenid.frozenid
+        except AttributeError:
+            raise ValueError(f'unsupported frozenid {frozenid!r}')
     # We use a consistent naming convention for all frozen modules.
-    return os.path.join(destdir, modname.replace('.', '_')) + '.h'
+    frozenfile = frozenid.replace('.', '_') + '.h'
+    if not destdir:
+        return frozenfile
+    return os.path.join(destdir, frozenfile)
 
 
-def resolve_frozen_files(specs, destdir=MODULES_DIR):
-    frozen = {}
-    frozenids = []
-    lastsection = None
-    for spec in specs:
-        frozenid, pyfile, *_, section = spec
-        if frozenid in frozen:
-            if section is None:
-                lastsection = None
-            else:
-                assert section == lastsection
-            continue
-        lastsection = section
-        frozenfile = resolve_frozen_file(frozenid, destdir)
-        frozen[frozenid] = (pyfile, frozenfile)
-        frozenids.append(frozenid)
-    return frozen, frozenids
+#######################################
+# frozen modules
+
+class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
+
+    def __getattr__(self, name):
+        return getattr(self.source, name)
+
+    @property
+    def modname(self):
+        return self.name
+
+    def summarize(self):
+        source = self.source.modname
+        if source:
+            source = f'<{source}>'
+        else:
+            source = os.path.relpath(self.pyfile, ROOT_DIR)
+        return {
+            'module': self.name,
+            'ispkg': self.ispkg,
+            'source': source,
+            'frozen': os.path.basename(self.frozenfile),
+            'checksum': _get_checksum(self.frozenfile),
+        }
+
+
+def _iter_sources(modules):
+    seen = set()
+    for mod in modules:
+        if mod.source not in seen:
+            yield mod.source
+            seen.add(mod.source)
 
 
 #######################################
 # generic helpers
 
+def _get_checksum(filename):
+    with open(filename) as infile:
+        text = infile.read()
+    m = hashlib.sha256()
+    m.update(text.encode('utf8'))
+    return m.hexdigest()
+
+
 def resolve_modules(modname, pyfile=None):
     if modname.startswith('<') and modname.endswith('>'):
         if pyfile:
@@ -293,38 +376,68 @@ def replace_block(lines, start_marker, end_marker, replacements, file):
     return lines[:start_pos + 1] + replacements + lines[end_pos:]
 
 
-def regen_frozen(specs, dest=MODULES_DIR):
-    if isinstance(dest, str):
-        frozen, frozenids = resolve_frozen_files(specs, destdir)
-    else:
-        frozenids, frozen = dest
+def regen_manifest(modules):
+    header = 'module ispkg source frozen checksum'.split()
+    widths = [5] * len(header)
+    rows = []
+    for mod in modules:
+        info = mod.summarize()
+        row = []
+        for i, col in enumerate(header):
+            value = info[col]
+            if col == 'checksum':
+                value = value[:12]
+            elif col == 'ispkg':
+                value = 'YES' if value else 'no'
+            widths[i] = max(widths[i], len(value))
+            row.append(value or '-')
+        rows.append(row)
+
+    modlines = [
+        '# The list of frozen modules with key information.',
+        '# Note that the "check_generated_files" CI job will identify',
+        '# when source files were changed but regen-frozen wasn\'t run.',
+        '# This file is auto-generated by Tools/scripts/freeze_modules.py.',
+        ' '.join(c.center(w) for c, w in zip(header, widths)).rstrip(),
+        ' '.join('-' * w for w in widths),
+    ]
+    for row in rows:
+        for i, w in enumerate(widths):
+            if header[i] == 'ispkg':
+                row[i] = row[i].center(w)
+            else:
+                row[i] = row[i].ljust(w)
+        modlines.append(' '.join(row).rstrip())
 
+    print(f'# Updating {os.path.relpath(MANIFEST)}')
+    with open(MANIFEST, 'w') as outfile:
+        lines = (l + '\n' for l in modlines)
+        outfile.writelines(lines)
+
+
+def regen_frozen(modules):
     headerlines = []
     parentdir = os.path.dirname(FROZEN_FILE)
-    for frozenid in frozenids:
+    for src in _iter_sources(modules):
         # Adding a comment to separate sections here doesn't add much,
         # so we don't.
-        _, frozenfile = frozen[frozenid]
-        header = os.path.relpath(frozenfile, parentdir)
+        header = os.path.relpath(src.frozenfile, parentdir)
         headerlines.append(f'#include "{header}"')
 
     deflines = []
     indent = '    '
     lastsection = None
-    for spec in specs:
-        frozenid, _, modname, ispkg, section = spec
-        if section != lastsection:
+    for mod in modules:
+        if mod.section != lastsection:
             if lastsection is not None:
                 deflines.append('')
-            deflines.append(f'/* {section} */')
-        lastsection = section
+            deflines.append(f'/* {mod.section} */')
+        lastsection = mod.section
 
-        # This matches what we do in Programs/_freeze_module.c:
-        name = frozenid.replace('.', '_')
-        symbol = '_Py_M__' + name
-        pkg = '-' if ispkg else ''
+        symbol = mod.symbol
+        pkg = '-' if mod.ispkg else ''
         line = ('{"%s", %s, %s(int)sizeof(%s)},'
-                % (modname, symbol, pkg, symbol))
+                ) % (mod.name, symbol, pkg, symbol)
         # TODO: Consider not folding lines
         if len(line) < 80:
             deflines.append(line)
@@ -361,22 +474,20 @@ def regen_frozen(specs, dest=MODULES_DIR):
         outfile.writelines(lines)
 
 
-def regen_makefile(frozenids, frozen):
+def regen_makefile(modules):
     frozenfiles = []
     rules = ['']
-    for frozenid in frozenids:
-        pyfile, frozenfile = frozen[frozenid]
-        header = os.path.relpath(frozenfile, ROOT_DIR)
+    for src in _iter_sources(modules):
+        header = os.path.relpath(src.frozenfile, ROOT_DIR)
         relfile = header.replace('\\', '/')
         frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\')
 
-        _pyfile = os.path.relpath(pyfile, ROOT_DIR)
-        tmpfile = f'{header}.new'
+        pyfile = os.path.relpath(src.pyfile, ROOT_DIR)
         # Note that we freeze the module to the target .h file
         # instead of going through an intermediate file like we used to.
-        rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}')
-        rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\')
-        rules.append(f'\t\t$(srcdir)/{_pyfile} \\')
+        rules.append(f'{header}: Programs/_freeze_module {pyfile}')
+        rules.append(f'\t$(srcdir)/Programs/_freeze_module {src.frozenid} \\')
+        rules.append(f'\t\t$(srcdir)/{pyfile} \\')
         rules.append(f'\t\t$(srcdir)/{header}')
         rules.append('')
 
@@ -402,22 +513,24 @@ def regen_makefile(frozenids, frozen):
         outfile.writelines(lines)
 
 
-def regen_pcbuild(frozenids, frozen):
+def regen_pcbuild(modules):
     projlines = []
     filterlines = []
-    for frozenid in frozenids:
-        pyfile, frozenfile = frozen[frozenid]
-
-        _pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\')
-        header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\')
+    for src in _iter_sources(modules):
+        # For now we only require the essential frozen modules on Windows.
+        # See bpo-45186 and bpo-45188.
+        if src.id not in ESSENTIAL and src.id != 'hello':
+            continue
+        pyfile = os.path.relpath(src.pyfile, ROOT_DIR).replace('/', '\\')
+        header = os.path.relpath(src.frozenfile, ROOT_DIR).replace('/', '\\')
         intfile = header.split('\\')[-1].strip('.h') + '.g.h'
-        projlines.append(f'    <None Include="..\\{_pyfile}">')
-        projlines.append(f'      <ModName>{frozenid}</ModName>')
+        projlines.append(f'    <None Include="..\\{pyfile}">')
+        projlines.append(f'      <ModName>{src.frozenid}</ModName>')
         projlines.append(f'      <IntFile>$(IntDir){intfile}</IntFile>')
         projlines.append(f'      <OutFile>$(PySourcePath){header}</OutFile>')
         projlines.append(f'    </None>')
 
-        filterlines.append(f'    <None Include="..\\{_pyfile}">')
+        filterlines.append(f'    <None Include="..\\{pyfile}">')
         filterlines.append('      <Filter>Python Files</Filter>')
         filterlines.append('    </None>')
 
@@ -451,7 +564,7 @@ def regen_pcbuild(frozenids, frozen):
 def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
     """Generate the frozen module .h file for the given module."""
     for modname, pyfile, ispkg in resolve_modules(modname, pyfile):
-        frozenfile = _resolve_frozen(modname, destdir)
+        frozenfile = resolve_frozen_file(modname, destdir)
         _freeze_module(modname, pyfile, frozenfile)
 
 
@@ -459,7 +572,7 @@ def _freeze_module(frozenid, pyfile, frozenfile):
     tmpfile = frozenfile + '.new'
 
     argv = [TOOL, frozenid, pyfile, tmpfile]
-    print('#', '  '.join(os.path.relpath(a) for a in argv))
+    print('#', '  '.join(os.path.relpath(a) for a in argv), flush=True)
     try:
         subprocess.run(argv, check=True)
     except subprocess.CalledProcessError:
@@ -475,18 +588,17 @@ def _freeze_module(frozenid, pyfile, frozenfile):
 
 def main():
     # Expand the raw specs, preserving order.
-    specs = list(parse_frozen_specs())
-    frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR)
-
-    # Regen build-related files.
-    regen_frozen(specs, (frozenids, frozen))
-    regen_makefile(frozenids, frozen)
-    regen_pcbuild(frozenids, frozen)
+    modules = list(parse_frozen_specs(destdir=MODULES_DIR))
 
     # Freeze the target modules.
-    for frozenid in frozenids:
-        pyfile, frozenfile = frozen[frozenid]
-        _freeze_module(frozenid, pyfile, frozenfile)
+    for src in _iter_sources(modules):
+        _freeze_module(src.frozenid, src.pyfile, src.frozenfile)
+
+    # Regen build-related files.
+    regen_manifest(modules)
+    regen_frozen(modules)
+    regen_makefile(modules)
+    regen_pcbuild(modules)
 
 
 if __name__ == '__main__':
diff --git a/Tools/scripts/generate_stdlib_module_names.py b/Tools/scripts/generate_stdlib_module_names.py
index 716a6d4b7a07f..325ae202b1d8c 100644
--- a/Tools/scripts/generate_stdlib_module_names.py
+++ b/Tools/scripts/generate_stdlib_module_names.py
@@ -117,9 +117,19 @@ def list_frozen(names):
         cmd = ' '.join(args)
         print(f"{cmd} failed with exitcode {exitcode}")
         sys.exit(exitcode)
+    submodules = set()
     for line in proc.stdout.splitlines():
         name = line.strip()
-        names.add(name)
+        if '.' in name:
+            submodules.add(name)
+        else:
+            names.add(name)
+    # Make sure all frozen submodules have a known parent.
+    for name in list(submodules):
+        if name.partition('.')[0] in names:
+            submodules.remove(name)
+    if submodules:
+        raise Exception(f'unexpected frozen submodules: {sorted(submodules)}')
 
 
 def list_modules():



More information about the Python-checkins mailing list