[Python-checkins] GH-104114: Fix `pathlib.WindowsPath.glob()` use of literal pattern segment case (GH-104116)

barneygale webhook-mailer at python.org
Wed May 3 12:28:57 EDT 2023


https://github.com/python/cpython/commit/da1980afcb8820ffaa0574df735bc39b1a276a76
commit: da1980afcb8820ffaa0574df735bc39b1a276a76
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-03T17:28:44+01:00
summary:

GH-104114: Fix `pathlib.WindowsPath.glob()` use of literal pattern segment case (GH-104116)

We now use `_WildcardSelector` to evaluate literal pattern segments, which
allows us to retrieve the real filesystem case.

This change is necessary in order to implement a *case_sensitive* argument
(see GH-81079) and a *follow_symlinks* argument (see GH-77609).

files:
A Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index dee19d1f89ad..8a1651c23d7f 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -54,11 +54,6 @@ def _ignore_error(exception):
             getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
 
 
-def _is_wildcard_pattern(pat):
-    # Whether this pattern needs actual matching using fnmatch, or can
-    # be looked up directly as a file.
-    return "*" in pat or "?" in pat or "[" in pat
-
 def _is_case_sensitive(flavour):
     return flavour.normcase('Aa') == 'Aa'
 
@@ -78,10 +73,8 @@ def _make_selector(pattern_parts, flavour):
         cls = _ParentSelector
     elif '**' in pat:
         raise ValueError("Invalid pattern: '**' can only be an entire path component")
-    elif _is_wildcard_pattern(pat):
-        cls = _WildcardSelector
     else:
-        cls = _PreciseSelector
+        cls = _WildcardSelector
     return cls(pat, child_parts, flavour)
 
 
@@ -102,17 +95,15 @@ def select_from(self, parent_path):
         """Iterate over all child paths of `parent_path` matched by this
         selector.  This can contain parent_path itself."""
         path_cls = type(parent_path)
-        is_dir = path_cls.is_dir
-        exists = path_cls.exists
         scandir = path_cls._scandir
-        if not is_dir(parent_path):
+        if not parent_path.is_dir():
             return iter([])
-        return self._select_from(parent_path, is_dir, exists, scandir)
+        return self._select_from(parent_path, scandir)
 
 
 class _TerminatingSelector:
 
-    def _select_from(self, parent_path, is_dir, exists, scandir):
+    def _select_from(self, parent_path, scandir):
         yield parent_path
 
 
@@ -120,29 +111,12 @@ class _ParentSelector(_Selector):
     def __init__(self, name, child_parts, flavour):
         _Selector.__init__(self, child_parts, flavour)
 
-    def _select_from(self,  parent_path, is_dir, exists, scandir):
+    def _select_from(self,  parent_path, scandir):
         path = parent_path._make_child_relpath('..')
-        for p in self.successor._select_from(path, is_dir, exists, scandir):
+        for p in self.successor._select_from(path, scandir):
             yield p
 
 
-class _PreciseSelector(_Selector):
-
-    def __init__(self, name, child_parts, flavour):
-        self.name = name
-        _Selector.__init__(self, child_parts, flavour)
-
-    def _select_from(self, parent_path, is_dir, exists, scandir):
-        try:
-            path = parent_path._make_child_relpath(self.name)
-            follow = is_dir(path) if self.dironly else exists(path, follow_symlinks=False)
-            if follow:
-                for p in self.successor._select_from(path, is_dir, exists, scandir):
-                    yield p
-        except PermissionError:
-            return
-
-
 class _WildcardSelector(_Selector):
 
     def __init__(self, pat, child_parts, flavour):
@@ -150,7 +124,7 @@ def __init__(self, pat, child_parts, flavour):
         self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
         _Selector.__init__(self, child_parts, flavour)
 
-    def _select_from(self, parent_path, is_dir, exists, scandir):
+    def _select_from(self, parent_path, scandir):
         try:
             # We must close the scandir() object before proceeding to
             # avoid exhausting file descriptors when globbing deep trees.
@@ -171,7 +145,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir):
                 name = entry.name
                 if self.match(name):
                     path = parent_path._make_child_relpath(name)
-                    for p in self.successor._select_from(path, is_dir, exists, scandir):
+                    for p in self.successor._select_from(path, scandir):
                         yield p
         except PermissionError:
             return
@@ -182,7 +156,7 @@ class _RecursiveWildcardSelector(_Selector):
     def __init__(self, pat, child_parts, flavour):
         _Selector.__init__(self, child_parts, flavour)
 
-    def _iterate_directories(self, parent_path, is_dir, scandir):
+    def _iterate_directories(self, parent_path, scandir):
         yield parent_path
         try:
             # We must close the scandir() object before proceeding to
@@ -198,18 +172,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
                         raise
                 if entry_is_dir and not entry.is_symlink():
                     path = parent_path._make_child_relpath(entry.name)
-                    for p in self._iterate_directories(path, is_dir, scandir):
+                    for p in self._iterate_directories(path, scandir):
                         yield p
         except PermissionError:
             return
 
-    def _select_from(self, parent_path, is_dir, exists, scandir):
+    def _select_from(self, parent_path, scandir):
         try:
             yielded = set()
             try:
                 successor_select = self.successor._select_from
-                for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
-                    for p in successor_select(starting_point, is_dir, exists, scandir):
+                for starting_point in self._iterate_directories(parent_path, scandir):
+                    for p in successor_select(starting_point, scandir):
                         if p not in yielded:
                             yield p
                             yielded.add(p)
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 620d480e37e2..424bb92a87d1 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -3122,7 +3122,7 @@ def test_glob(self):
         self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
         self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
         self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
-        self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
+        self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
         self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})
 
     def test_rglob(self):
@@ -3130,7 +3130,7 @@ def test_rglob(self):
         p = P(BASE, "dirC")
         self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
         self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
-        self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
+        self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})
 
     def test_expanduser(self):
         P = self.cls
diff --git a/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst b/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst
new file mode 100644
index 000000000000..e705fea8326e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst
@@ -0,0 +1,3 @@
+Fix issue where :meth:`pathlib.Path.glob` returns paths using the case of
+non-wildcard segments for corresponding path segments, rather than the real
+filesystem case.



More information about the Python-checkins mailing list