[Python-checkins] GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` (GH-102616)

barneygale webhook-mailer at python.org
Mon May 29 12:00:22 EDT 2023


https://github.com/python/cpython/commit/ace676e2c2121f94a55effc6a30b3b0e987ae7da
commit: ace676e2c2121f94a55effc6a30b3b0e987ae7da
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-29T16:59:52+01:00
summary:

GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` (GH-102616)

Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and`rglob()`.

When *follow_symlinks* is `None` (the default), these methods follow symlinks except when evaluating "`**`" wildcards. When set to true or false, symlinks are always or never followed, respectively.

files:
A Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst
M Doc/library/pathlib.rst
M Doc/whatsnew/3.13.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py

diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 627f2df9263d..ee3330f44f47 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -885,7 +885,7 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.5
 
 
-.. method:: Path.glob(pattern, *, case_sensitive=None)
+.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)
 
    Glob the given relative *pattern* in the directory represented by this path,
    yielding all matching files (of any kind)::
@@ -911,6 +911,11 @@ call fails (for example because the path doesn't exist).
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
    Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
 
+   By default, or when the *follow_symlinks* keyword-only argument is set to
+   ``None``, this method follows symlinks except when expanding "``**``"
+   wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
+   ``False`` to treat all symlinks as files.
+
    .. note::
       Using the "``**``" pattern in large directory trees may consume
       an inordinate amount of time.
@@ -924,6 +929,9 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.12
       The *case_sensitive* argument.
 
+   .. versionadded:: 3.13
+      The *follow_symlinks* argument.
+
 .. method:: Path.group()
 
    Return the name of the group owning the file.  :exc:`KeyError` is raised
@@ -1309,7 +1317,7 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.6
       The *strict* argument (pre-3.6 behavior is strict).
 
-.. method:: Path.rglob(pattern, *, case_sensitive=None)
+.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)
 
    Glob the given relative *pattern* recursively.  This is like calling
    :func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1327,6 +1335,11 @@ call fails (for example because the path doesn't exist).
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
    Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
 
+   By default, or when the *follow_symlinks* keyword-only argument is set to
+   ``None``, this method follows symlinks except when expanding "``**``"
+   wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
+   ``False`` to treat all symlinks as files.
+
    .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
 
    .. versionchanged:: 3.11
@@ -1336,6 +1349,9 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.12
       The *case_sensitive* argument.
 
+   .. versionadded:: 3.13
+      The *follow_symlinks* argument.
+
 .. method:: Path.rmdir()
 
    Remove this directory.  The directory must be empty.
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 45728d1801d2..8c81ac76a56b 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -87,6 +87,12 @@ New Modules
 Improved Modules
 ================
 
+pathlib
+-------
+
+* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and
+  :meth:`~pathlib.Path.rglob`.
+  (Contributed by Barney Gale in :gh:`77609`.)
 
 Optimizations
 =============
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 8cb5279d735a..87c2e970a0a8 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -105,19 +105,19 @@ def __init__(self, child_parts, flavour, case_sensitive):
             self.successor = _TerminatingSelector()
             self.dironly = False
 
-    def select_from(self, parent_path):
+    def select_from(self, parent_path, follow_symlinks):
         """Iterate over all child paths of `parent_path` matched by this
         selector.  This can contain parent_path itself."""
         path_cls = type(parent_path)
         scandir = path_cls._scandir
         if not parent_path.is_dir():
             return iter([])
-        return self._select_from(parent_path, scandir)
+        return self._select_from(parent_path, scandir, follow_symlinks)
 
 
 class _TerminatingSelector:
 
-    def _select_from(self, parent_path, scandir):
+    def _select_from(self, parent_path, scandir, follow_symlinks):
         yield parent_path
 
 
@@ -126,9 +126,9 @@ class _ParentSelector(_Selector):
     def __init__(self, name, child_parts, flavour, case_sensitive):
         _Selector.__init__(self, child_parts, flavour, case_sensitive)
 
-    def _select_from(self,  parent_path, scandir):
+    def _select_from(self,  parent_path, scandir, follow_symlinks):
         path = parent_path._make_child_relpath('..')
-        for p in self.successor._select_from(path, scandir):
+        for p in self.successor._select_from(path, scandir, follow_symlinks):
             yield p
 
 
@@ -141,7 +141,8 @@ def __init__(self, pat, child_parts, flavour, case_sensitive):
             case_sensitive = _is_case_sensitive(flavour)
         self.match = _compile_pattern(pat, case_sensitive)
 
-    def _select_from(self, parent_path, scandir):
+    def _select_from(self, parent_path, scandir, follow_symlinks):
+        follow_dirlinks = True if follow_symlinks is None else follow_symlinks
         try:
             # We must close the scandir() object before proceeding to
             # avoid exhausting file descriptors when globbing deep trees.
@@ -153,14 +154,14 @@ def _select_from(self, parent_path, scandir):
             for entry in entries:
                 if self.dironly:
                     try:
-                        if not entry.is_dir():
+                        if not entry.is_dir(follow_symlinks=follow_dirlinks):
                             continue
                     except OSError:
                         continue
                 name = entry.name
                 if self.match(name):
                     path = parent_path._make_child_relpath(name)
-                    for p in self.successor._select_from(path, scandir):
+                    for p in self.successor._select_from(path, scandir, follow_symlinks):
                         yield p
 
 
@@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector):
     def __init__(self, pat, child_parts, flavour, case_sensitive):
         _Selector.__init__(self, child_parts, flavour, case_sensitive)
 
-    def _iterate_directories(self, parent_path):
+    def _iterate_directories(self, parent_path, follow_symlinks):
         yield parent_path
-        for dirpath, dirnames, _ in parent_path.walk():
+        for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks):
             for dirname in dirnames:
                 yield dirpath._make_child_relpath(dirname)
 
-    def _select_from(self, parent_path, scandir):
+    def _select_from(self, parent_path, scandir, follow_symlinks):
+        follow_dirlinks = False if follow_symlinks is None else follow_symlinks
         successor_select = self.successor._select_from
-        for starting_point in self._iterate_directories(parent_path):
-            for p in successor_select(starting_point, scandir):
+        for starting_point in self._iterate_directories(parent_path, follow_dirlinks):
+            for p in successor_select(starting_point, scandir, follow_symlinks):
                 yield p
 
 
@@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
     multiple non-adjacent '**' segments.
     """
 
-    def _select_from(self, parent_path, scandir):
+    def _select_from(self, parent_path, scandir, follow_symlinks):
         yielded = set()
         try:
-            for p in super()._select_from(parent_path, scandir):
+            for p in super()._select_from(parent_path, scandir, follow_symlinks):
                 if p not in yielded:
                     yield p
                     yielded.add(p)
@@ -994,7 +996,7 @@ def _make_child_relpath(self, name):
         path._tail_cached = tail + [name]
         return path
 
-    def glob(self, pattern, *, case_sensitive=None):
+    def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
         """Iterate over this subtree and yield all existing files (of any
         kind, including directories) matching the given relative pattern.
         """
@@ -1007,10 +1009,10 @@ def glob(self, pattern, *, case_sensitive=None):
         if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
         selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
-        for p in selector.select_from(self):
+        for p in selector.select_from(self, follow_symlinks):
             yield p
 
-    def rglob(self, pattern, *, case_sensitive=None):
+    def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
         """Recursively yield all existing files (of any kind, including
         directories) matching the given relative pattern, anywhere in
         this subtree.
@@ -1022,7 +1024,7 @@ def rglob(self, pattern, *, case_sensitive=None):
         if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
         selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
-        for p in selector.select_from(self):
+        for p in selector.select_from(self, follow_symlinks):
             yield p
 
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 01615e209456..4391d685d3c1 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1863,6 +1863,35 @@ def _check(path, pattern, case_sensitive, expected):
         _check(path, "dirb/file*", True, [])
         _check(path, "dirb/file*", False, ["dirB/fileB"])
 
+    @os_helper.skip_unless_symlink
+    def test_glob_follow_symlinks_common(self):
+        def _check(path, glob, expected):
+            actual = {path for path in path.glob(glob, follow_symlinks=True)
+                      if "linkD" not in path.parent.parts}  # exclude symlink loop.
+            self.assertEqual(actual, { P(BASE, q) for q in expected })
+        P = self.cls
+        p = P(BASE)
+        _check(p, "fileB", [])
+        _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
+        _check(p, "*A", ["dirA", "fileA", "linkA"])
+        _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
+        _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
+        _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
+
+    @os_helper.skip_unless_symlink
+    def test_glob_no_follow_symlinks_common(self):
+        def _check(path, glob, expected):
+            actual = {path for path in path.glob(glob, follow_symlinks=False)}
+            self.assertEqual(actual, { P(BASE, q) for q in expected })
+        P = self.cls
+        p = P(BASE)
+        _check(p, "fileB", [])
+        _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
+        _check(p, "*A", ["dirA", "fileA", "linkA"])
+        _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
+        _check(p, "*/fileB", ["dirB/fileB"])
+        _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
+
     def test_rglob_common(self):
         def _check(glob, expected):
             self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
@@ -1906,6 +1935,60 @@ def _check(glob, expected):
         _check(p.rglob("*.txt"), ["dirC/novel.txt"])
         _check(p.rglob("*.*"), ["dirC/novel.txt"])
 
+    @os_helper.skip_unless_symlink
+    def test_rglob_follow_symlinks_common(self):
+        def _check(path, glob, expected):
+            actual = {path for path in path.rglob(glob, follow_symlinks=True)
+                      if 'linkD' not in path.parent.parts}  # exclude symlink loop.
+            self.assertEqual(actual, { P(BASE, q) for q in expected })
+        P = self.cls
+        p = P(BASE)
+        _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
+        _check(p, "*/fileA", [])
+        _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
+        _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
+                            "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
+        _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
+                         "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"])
+        _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
+                       "dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"])
+
+        p = P(BASE, "dirC")
+        _check(p, "*", ["dirC/fileC", "dirC/novel.txt",
+                        "dirC/dirD", "dirC/dirD/fileD"])
+        _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
+        _check(p, "*/*", ["dirC/dirD/fileD"])
+        _check(p, "*/", ["dirC/dirD"])
+        _check(p, "", ["dirC", "dirC/dirD"])
+        # gh-91616, a re module regression
+        _check(p, "*.txt", ["dirC/novel.txt"])
+        _check(p, "*.*", ["dirC/novel.txt"])
+
+    @os_helper.skip_unless_symlink
+    def test_rglob_no_follow_symlinks_common(self):
+        def _check(path, glob, expected):
+            actual = {path for path in path.rglob(glob, follow_symlinks=False)}
+            self.assertEqual(actual, { P(BASE, q) for q in expected })
+        P = self.cls
+        p = P(BASE)
+        _check(p, "fileB", ["dirB/fileB"])
+        _check(p, "*/fileA", [])
+        _check(p, "*/fileB", ["dirB/fileB"])
+        _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
+        _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
+        _check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])
+
+        p = P(BASE, "dirC")
+        _check(p, "*", ["dirC/fileC", "dirC/novel.txt",
+                        "dirC/dirD", "dirC/dirD/fileD"])
+        _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
+        _check(p, "*/*", ["dirC/dirD/fileD"])
+        _check(p, "*/", ["dirC/dirD"])
+        _check(p, "", ["dirC", "dirC/dirD"])
+        # gh-91616, a re module regression
+        _check(p, "*.txt", ["dirC/novel.txt"])
+        _check(p, "*.*", ["dirC/novel.txt"])
+
     @os_helper.skip_unless_symlink
     def test_rglob_symlink_loop(self):
         # Don't get fooled by symlink loops (Issue #26012).
diff --git a/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst b/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst
new file mode 100644
index 000000000000..35e61088de58
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-03-12-03-37-03.gh-issue-77609.aOQttm.rst
@@ -0,0 +1,2 @@
+Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and
+:meth:`~pathlib.Path.rglob`, defaulting to false.



More information about the Python-checkins mailing list