[Python-checkins] GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)

barneygale webhook-mailer at python.org
Wed Mar 22 10:45:35 EDT 2023


https://github.com/python/cpython/commit/713df2c53489ce8012d0ede10b70950e6b0d8372
commit: 713df2c53489ce8012d0ede10b70950e6b0d8372
branch: main
author: Stanislav Zmiev <szmiev2000 at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-03-22T14:45:25Z
summary:

GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)

Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale at gmail.com>
Co-authored-by: Brett Cannon <brett at python.org>

files:
A Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 55c44f12e5a2..a126bf2fe557 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -1197,45 +1197,47 @@ def expanduser(self):
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
         """Walk the directory tree from this directory, similar to os.walk()."""
         sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
-        return self._walk(top_down, on_error, follow_symlinks)
-
-    def _walk(self, top_down, on_error, follow_symlinks):
-        # We may not have read permission for self, in which case we can't
-        # get a list of the files the directory contains. os.walk
-        # always suppressed the exception then, rather than blow up for a
-        # minor reason when (say) a thousand readable directories are still
-        # left to visit. That logic is copied here.
-        try:
-            scandir_it = self._scandir()
-        except OSError as error:
-            if on_error is not None:
-                on_error(error)
-            return
-
-        with scandir_it:
-            dirnames = []
-            filenames = []
-            for entry in scandir_it:
-                try:
-                    is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
-                except OSError:
-                    # Carried over from os.path.isdir().
-                    is_dir = False
-
-                if is_dir:
-                    dirnames.append(entry.name)
-                else:
-                    filenames.append(entry.name)
-
-        if top_down:
-            yield self, dirnames, filenames
-
-        for dirname in dirnames:
-            dirpath = self._make_child_relpath(dirname)
-            yield from dirpath._walk(top_down, on_error, follow_symlinks)
+        paths = [self]
+
+        while paths:
+            path = paths.pop()
+            if isinstance(path, tuple):
+                yield path
+                continue
+
+            # We may not have read permission for self, in which case we can't
+            # get a list of the files the directory contains. os.walk()
+            # always suppressed the exception in that instance, rather than
+            # blow up for a minor reason when (say) a thousand readable
+            # directories are still left to visit. That logic is copied here.
+            try:
+                scandir_it = path._scandir()
+            except OSError as error:
+                if on_error is not None:
+                    on_error(error)
+                continue
+
+            with scandir_it:
+                dirnames = []
+                filenames = []
+                for entry in scandir_it:
+                    try:
+                        is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
+                    except OSError:
+                        # Carried over from os.path.isdir().
+                        is_dir = False
+
+                    if is_dir:
+                        dirnames.append(entry.name)
+                    else:
+                        filenames.append(entry.name)
+
+            if top_down:
+                yield path, dirnames, filenames
+            else:
+                paths.append((path, dirnames, filenames))
 
-        if not top_down:
-            yield self, dirnames, filenames
+            paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
 
 
 class PosixPath(Path, PurePosixPath):
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index f05dead58867..3041630da678 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -13,6 +13,7 @@
 from unittest import mock
 
 from test.support import import_helper
+from test.support import set_recursion_limit
 from test.support import is_emscripten, is_wasi
 from test.support import os_helper
 from test.support.os_helper import TESTFN, FakePath
@@ -2793,6 +2794,18 @@ def test_walk_many_open_files(self):
                 self.assertEqual(next(it), expected)
             path = path / 'd'
 
+    def test_walk_above_recursion_limit(self):
+        recursion_limit = 40
+        # directory_depth > recursion_limit
+        directory_depth = recursion_limit + 10
+        base = pathlib.Path(os_helper.TESTFN, 'deep')
+        path = pathlib.Path(base, *(['d'] * directory_depth))
+        path.mkdir(parents=True)
+
+        with set_recursion_limit(recursion_limit):
+            list(base.walk())
+            list(base.walk(top_down=False))
+
 
 class PathTest(_BasePathTest, unittest.TestCase):
     cls = pathlib.Path
diff --git a/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst
new file mode 100644
index 000000000000..f9ac1475dceb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst
@@ -0,0 +1 @@
+Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.



More information about the Python-checkins mailing list