[Python-checkins] cpython: Issue #23605: Fix os.walk(topdown=True), don't cache entry.is_symlink() because

victor.stinner python-checkins at python.org
Wed Mar 18 11:31:23 CET 2015


https://hg.python.org/cpython/rev/4fb829f8c04d
changeset:   95031:4fb829f8c04d
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Wed Mar 18 11:29:47 2015 +0100
summary:
  Issue #23605: Fix os.walk(topdown=True), don't cache entry.is_symlink() because
the caller can replace the directory with a different file kind.

The bottom-up way, os.walk(topdown=False), still uses entry.is_symlink(), and
so can be faster than Python 3.4.

files:
  Lib/os.py |  78 +++++++++++++++++++++++++++---------------
  1 files changed, 50 insertions(+), 28 deletions(-)


diff --git a/Lib/os.py b/Lib/os.py
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -354,7 +354,6 @@
 
     dirs = []
     nondirs = []
-    symlinks = set()
 
     # We may not have read permission for top, in which case we can't
     # get a list of the files the directory contains.  os.walk
@@ -364,46 +363,69 @@
     try:
         # Note that scandir is global in this module due
         # to earlier import-*.
-        for entry in scandir(top):
+        scandir_it = scandir(top)
+    except OSError as error:
+        if onerror is not None:
+            onerror(error)
+        return
+
+    while True:
+        try:
             try:
-                is_dir = entry.is_dir()
-            except OSError:
-                # If is_dir() raises an OSError, consider that the entry is not
-                # a directory, same behaviour than os.path.isdir().
-                is_dir = False
+                entry = next(scandir_it)
+            except StopIteration:
+                break
+        except OSError as error:
+            if onerror is not None:
+                onerror(error)
+            return
 
-            if is_dir:
-                dirs.append(entry.name)
+        try:
+            is_dir = entry.is_dir()
+        except OSError:
+            # If is_dir() raises an OSError, consider that the entry is not
+            # a directory, same behaviour than os.path.isdir().
+            is_dir = False
+
+        if is_dir:
+            dirs.append(entry.name)
+        else:
+            nondirs.append(entry.name)
+
+        if not topdown and is_dir:
+            # Bottom-up: recurse into sub-directory, but exclude symlinks to
+            # directories if followlinks is False
+            if followlinks:
+                walk_into = True
             else:
-                nondirs.append(entry.name)
-
-            if is_dir and not followlinks:
                 try:
-                    if entry.is_symlink():
-                        symlinks.add(entry.name)
+                    is_symlink = entry.is_symlink()
                 except OSError:
                     # If is_symlink() raises an OSError, consider that the
                     # entry is not a symbolik link, same behaviour than
                     # os.path.islink().
-                    pass
-    except OSError as error:
-        # scandir() or iterating into scandir() iterator raised an OSError
-        if onerror is not None:
-            onerror(error)
-        return
+                    is_symlink = False
+                walk_into = not is_symlink
+
+            if walk_into:
+                yield from walk(entry.path, topdown, onerror, followlinks)
 
     # Yield before recursion if going top down
     if topdown:
         yield top, dirs, nondirs
 
-    # Recurse into sub-directories
-    for name in dirs:
-        if followlinks or name not in symlinks:
-            new_path = path.join(top, name)
-            yield from walk(new_path, topdown, onerror, followlinks)
-
-    # Yield after recursion if going bottom up
-    if not topdown:
+        # Recurse into sub-directories
+        islink, join = path.islink, path.join
+        for name in dirs:
+            new_path = join(top, name)
+            # Issue #23605: os.path.islink() is used instead of caching
+            # entry.is_symlink() result during the loop on os.scandir() because
+            # the caller can replace the directory entry during the "yield"
+            # above.
+            if followlinks or not islink(new_path):
+                yield from walk(new_path, topdown, onerror, followlinks)
+    else:
+        # Yield after recursion if going bottom up
         yield top, dirs, nondirs
 
 __all__.append("walk")

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list