[Python-checkins] GH-102613: Fix recursion error from `pathlib.Path.glob()` (GH-104373)

barneygale webhook-mailer at python.org
Mon May 15 13:33:39 EDT 2023


https://github.com/python/cpython/commit/cb88ae635e96d7020ba6187bcfd45ace4dcd8395
commit: cb88ae635e96d7020ba6187bcfd45ace4dcd8395
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-15T18:33:32+01:00
summary:

GH-102613: Fix recursion error from `pathlib.Path.glob()` (GH-104373)

Use `Path.walk()` to implement the recursive wildcard `**`. This method
uses an iterative (rather than recursive) walk - see GH-100282.

files:
A Misc/NEWS.d/next/Library/2023-05-11-01-07-42.gh-issue-102613.uMsokt.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 40b72930e1f0..ef7c47c9e775 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -164,30 +164,15 @@ class _RecursiveWildcardSelector(_Selector):
     def __init__(self, pat, child_parts, flavour, case_sensitive):
         _Selector.__init__(self, child_parts, flavour, case_sensitive)
 
-    def _iterate_directories(self, parent_path, scandir):
+    def _iterate_directories(self, parent_path):
         yield parent_path
-        try:
-            # We must close the scandir() object before proceeding to
-            # avoid exhausting file descriptors when globbing deep trees.
-            with scandir(parent_path) as scandir_it:
-                entries = list(scandir_it)
-        except OSError:
-            pass
-        else:
-            for entry in entries:
-                entry_is_dir = False
-                try:
-                    entry_is_dir = entry.is_dir(follow_symlinks=False)
-                except OSError:
-                    pass
-                if entry_is_dir:
-                    path = parent_path._make_child_relpath(entry.name)
-                    for p in self._iterate_directories(path, scandir):
-                        yield p
+        for dirpath, dirnames, _ in parent_path.walk():
+            for dirname in dirnames:
+                yield dirpath._make_child_relpath(dirname)
 
     def _select_from(self, parent_path, scandir):
         successor_select = self.successor._select_from
-        for starting_point in self._iterate_directories(parent_path, scandir):
+        for starting_point in self._iterate_directories(parent_path):
             for p in successor_select(starting_point, scandir):
                 yield p
 
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 67ca4795962b..46a5248499c5 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1972,6 +1972,17 @@ def test_glob_long_symlink(self):
         bad_link.symlink_to("bad" * 200)
         self.assertEqual(sorted(base.glob('**/*')), [bad_link])
 
+    def test_glob_above_recursion_limit(self):
+        recursion_limit = 40
+        # directory_depth > recursion_limit
+        directory_depth = recursion_limit + 10
+        base = pathlib.Path(os_helper.TESTFN, 'deep')
+        path = pathlib.Path(base, *(['d'] * directory_depth))
+        path.mkdir(parents=True)
+
+        with set_recursion_limit(recursion_limit):
+            list(base.glob('**'))
+
     def _check_resolve(self, p, expected, strict=True):
         q = p.resolve(strict)
         self.assertEqual(q, expected)
diff --git a/Misc/NEWS.d/next/Library/2023-05-11-01-07-42.gh-issue-102613.uMsokt.rst b/Misc/NEWS.d/next/Library/2023-05-11-01-07-42.gh-issue-102613.uMsokt.rst
new file mode 100644
index 000000000000..3b06964dc8d2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-11-01-07-42.gh-issue-102613.uMsokt.rst
@@ -0,0 +1,2 @@
+Fix issue where :meth:`pathlib.Path.glob` raised :exc:`RecursionError` when
+walking deep directory trees.



More information about the Python-checkins mailing list