[Python-checkins] bpo-35936: Updates to modulefinder (GH-11787)

Nick Coghlan webhook-mailer at python.org
Sun Apr 7 04:00:48 EDT 2019


https://github.com/python/cpython/commit/9d7b2c0909b78800d1376fd696f73824ea680463
commit: 9d7b2c0909b78800d1376fd696f73824ea680463
branch: master
author: Brandt Bucher <brandtbucher at gmail.com>
committer: Nick Coghlan <ncoghlan at gmail.com>
date: 2019-04-07T18:00:41+10:00
summary:

bpo-35936: Updates to modulefinder (GH-11787)

* Properly handle SyntaxErrors in Python source files.

SyntaxErrors in the target module will rise normally, while SyntaxErrors in dependencies will be added to badmodules. This includes a new regression test.

* Fix name collision bug.

This fixes an issue where a "fromlist" import with the same name as a previously failed import would be incorrectly added to badmodules. This includes a new regression test.

* Replace mutable default values.

Bound empty lists have been replaced with the "if param is None" idiom.

* Replace deprecated imp usage.

Constants imported from imp have been moved to private module-level constants, and ModuleFinder.find_module has been refactored to use importlib. Other than an improvement on how frozen builtin imports are reported (as the frozen imports they are, rather than the stdlib modules they *may* have originated from), these changes maintain complete compatibility with past versions... including odd behavior for returning relative (below current directory, but not a C extension) vs. absolute (above current directory, or a C extension) paths.

Patch by Brandt Bucher.

files:
A Misc/NEWS.d/next/Library/2019-02-13-18-56-22.bpo-17396.oKRkrD.rst
A Misc/NEWS.d/next/Library/2019-02-13-18-56-27.bpo-35376.UFhYLj.rst
A Misc/NEWS.d/next/Library/2019-02-16-22-19-32.bpo-35936.Ay5WtD.rst
M Lib/modulefinder.py
M Lib/test/test_modulefinder.py
M Misc/ACKS

diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
index 10320a74d942..0061ef415ce3 100644
--- a/Lib/modulefinder.py
+++ b/Lib/modulefinder.py
@@ -8,9 +8,7 @@
 import sys
 import types
 import warnings
-with warnings.catch_warnings():
-    warnings.simplefilter('ignore', DeprecationWarning)
-    import imp
+
 
 LOAD_CONST = dis.opmap['LOAD_CONST']
 IMPORT_NAME = dis.opmap['IMPORT_NAME']
@@ -19,6 +17,16 @@
 STORE_OPS = STORE_NAME, STORE_GLOBAL
 EXTENDED_ARG = dis.EXTENDED_ARG
 
+# Old imp constants:
+
+_SEARCH_ERROR = 0
+_PY_SOURCE = 1
+_PY_COMPILED = 2
+_C_EXTENSION = 3
+_PKG_DIRECTORY = 5
+_C_BUILTIN = 6
+_PY_FROZEN = 7
+
 # Modulefinder does a good job at simulating Python's, but it can not
 # handle __path__ modifications packages make at runtime.  Therefore there
 # is a mechanism whereby you can register extra paths in this map for a
@@ -43,6 +51,54 @@ def ReplacePackage(oldname, newname):
     replacePackageMap[oldname] = newname
 
 
+def _find_module(name, path=None):
+    """An importlib reimplementation of imp.find_module (for our purposes)."""
+
+    # It's necessary to clear the caches for our Finder first, in case any
+    # modules are being added/deleted/modified at runtime. In particular,
+    # test_modulefinder.py changes file tree contents in a cache-breaking way:
+
+    importlib.machinery.PathFinder.invalidate_caches()
+
+    spec = importlib.machinery.PathFinder.find_spec(name, path)
+
+    if spec is None:
+        raise ImportError("No module named {name!r}".format(name=name), name=name)
+
+    # Some special cases:
+
+    if spec.loader is importlib.machinery.BuiltinImporter:
+        return None, None, ("", "", _C_BUILTIN)
+
+    if spec.loader is importlib.machinery.FrozenImporter:
+        return None, None, ("", "", _PY_FROZEN)
+
+    file_path = spec.origin
+
+    if spec.loader.is_package(name):
+        return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
+
+    if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
+        kind = _PY_SOURCE
+        mode = "r"
+
+    elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
+        kind = _C_EXTENSION
+        mode = "rb"
+
+    elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
+        kind = _PY_COMPILED
+        mode = "rb"
+
+    else:  # Should never happen.
+        return None, None, ("", "", _SEARCH_ERROR)
+
+    file = open(file_path, mode)
+    suffix = os.path.splitext(file_path)[-1]
+
+    return file, file_path, (suffix, mode, kind)
+
+
 class Module:
 
     def __init__(self, name, file=None, path=None):
@@ -69,7 +125,7 @@ def __repr__(self):
 
 class ModuleFinder:
 
-    def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
+    def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
         if path is None:
             path = sys.path
         self.path = path
@@ -77,8 +133,8 @@ def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
         self.badmodules = {}
         self.debug = debug
         self.indent = 0
-        self.excludes = excludes
-        self.replace_paths = replace_paths
+        self.excludes = excludes if excludes is not None else []
+        self.replace_paths = replace_paths if replace_paths is not None else []
         self.processed_paths = []   # Used in debugging only
 
     def msg(self, level, str, *args):
@@ -105,14 +161,14 @@ def msgout(self, *args):
     def run_script(self, pathname):
         self.msg(2, "run_script", pathname)
         with open(pathname) as fp:
-            stuff = ("", "r", imp.PY_SOURCE)
+            stuff = ("", "r", _PY_SOURCE)
             self.load_module('__main__', fp, pathname, stuff)
 
     def load_file(self, pathname):
         dir, name = os.path.split(pathname)
         name, ext = os.path.splitext(name)
         with open(pathname) as fp:
-            stuff = (ext, "r", imp.PY_SOURCE)
+            stuff = (ext, "r", _PY_SOURCE)
             self.load_module(name, fp, pathname, stuff)
 
     def import_hook(self, name, caller=None, fromlist=None, level=-1):
@@ -279,13 +335,13 @@ def import_module(self, partname, fqname, parent):
     def load_module(self, fqname, fp, pathname, file_info):
         suffix, mode, type = file_info
         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
-        if type == imp.PKG_DIRECTORY:
+        if type == _PKG_DIRECTORY:
             m = self.load_package(fqname, pathname)
             self.msgout(2, "load_module ->", m)
             return m
-        if type == imp.PY_SOURCE:
+        if type == _PY_SOURCE:
             co = compile(fp.read()+'\n', pathname, 'exec')
-        elif type == imp.PY_COMPILED:
+        elif type == _PY_COMPILED:
             try:
                 data = fp.read()
                 importlib._bootstrap_external._classify_pyc(data, fqname, {})
@@ -323,17 +379,20 @@ def _safe_import_hook(self, name, caller, fromlist, level=-1):
         except ImportError as msg:
             self.msg(2, "ImportError:", str(msg))
             self._add_badmodule(name, caller)
+        except SyntaxError as msg:
+            self.msg(2, "SyntaxError:", str(msg))
+            self._add_badmodule(name, caller)
         else:
             if fromlist:
                 for sub in fromlist:
-                    if sub in self.badmodules:
-                        self._add_badmodule(sub, caller)
+                    fullname = name + "." + sub
+                    if fullname in self.badmodules:
+                        self._add_badmodule(fullname, caller)
                         continue
                     try:
                         self.import_hook(name, caller, [sub], level=level)
                     except ImportError as msg:
                         self.msg(2, "ImportError:", str(msg))
-                        fullname = name + "." + sub
                         self._add_badmodule(fullname, caller)
 
     def scan_opcodes(self, co):
@@ -445,10 +504,11 @@ def find_module(self, name, path, parent=None):
 
         if path is None:
             if name in sys.builtin_module_names:
-                return (None, None, ("", "", imp.C_BUILTIN))
+                return (None, None, ("", "", _C_BUILTIN))
 
             path = self.path
-        return imp.find_module(name, path)
+
+        return _find_module(name, path)
 
     def report(self):
         """Print a report to stdout, listing the found modules with their
diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py
index e4df2a90d4a4..ebd96e1c8a2d 100644
--- a/Lib/test/test_modulefinder.py
+++ b/Lib/test/test_modulefinder.py
@@ -218,6 +218,33 @@ def foo(): pass
     ""
 ]
 
+syntax_error_test = [
+    "a.module",
+    ["a", "a.module", "b"],
+    ["b.module"], [],
+    """\
+a/__init__.py
+a/module.py
+                                import b.module
+b/__init__.py
+b/module.py
+                                ?  # SyntaxError: invalid syntax
+"""]
+
+
+same_name_as_bad_test = [
+    "a.module",
+    ["a", "a.module", "b", "b.c"],
+    ["c"], [],
+    """\
+a/__init__.py
+a/module.py
+                                import c
+                                from b import c
+b/__init__.py
+b/c.py
+"""]
+
 
 def open_file(path):
     dirname = os.path.dirname(path)
@@ -299,6 +326,12 @@ def test_relative_imports_3(self):
     def test_relative_imports_4(self):
         self._do_test(relative_import_test_4)
 
+    def test_syntax_error(self):
+        self._do_test(syntax_error_test)
+
+    def test_same_name_as_bad(self):
+        self._do_test(same_name_as_bad_test)
+
     def test_bytecode(self):
         base_path = os.path.join(TEST_DIR, 'a')
         source_path = base_path + importlib.machinery.SOURCE_SUFFIXES[0]
diff --git a/Misc/ACKS b/Misc/ACKS
index 9cddcb3a871f..df6be5912785 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -222,6 +222,7 @@ Ian Bruntlett
 Floris Bruynooghe
 Matt Bryant
 Stan Bubrouski
+Brandt Bucher
 Colm Buckley
 Erik de Bueger
 Jan-Hein Bührman
diff --git a/Misc/NEWS.d/next/Library/2019-02-13-18-56-22.bpo-17396.oKRkrD.rst b/Misc/NEWS.d/next/Library/2019-02-13-18-56-22.bpo-17396.oKRkrD.rst
new file mode 100644
index 000000000000..50596cf9e43f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-02-13-18-56-22.bpo-17396.oKRkrD.rst
@@ -0,0 +1,2 @@
+:mod:`modulefinder` no longer crashes when encountering syntax errors in followed imports.
+Patch by Brandt Bucher.
\ No newline at end of file
diff --git a/Misc/NEWS.d/next/Library/2019-02-13-18-56-27.bpo-35376.UFhYLj.rst b/Misc/NEWS.d/next/Library/2019-02-13-18-56-27.bpo-35376.UFhYLj.rst
new file mode 100644
index 000000000000..a9bf8c9a636c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-02-13-18-56-27.bpo-35376.UFhYLj.rst
@@ -0,0 +1,2 @@
+:mod:`modulefinder` correctly handles modules that have the same name as a bad package.
+Patch by Brandt Bucher.
\ No newline at end of file
diff --git a/Misc/NEWS.d/next/Library/2019-02-16-22-19-32.bpo-35936.Ay5WtD.rst b/Misc/NEWS.d/next/Library/2019-02-16-22-19-32.bpo-35936.Ay5WtD.rst
new file mode 100644
index 000000000000..55a028ec8349
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-02-16-22-19-32.bpo-35936.Ay5WtD.rst
@@ -0,0 +1,2 @@
+:mod:`modulefinder` no longer depends on the deprecated :mod:`imp` module, and the initializer for :class:`modulefinder.ModuleFinder` now has immutable default arguments.
+Patch by Brandt Bucher.
\ No newline at end of file



More information about the Python-checkins mailing list