[Python-checkins] GH-81079: Add case_sensitive argument to `pathlib.Path.glob()` (GH-102710)
barneygale
webhook-mailer at python.org
Thu May 4 12:44:43 EDT 2023
https://github.com/python/cpython/commit/8100be5535073a5442c2b8c68dcb2093ee69433d
commit: 8100be5535073a5442c2b8c68dcb2093ee69433d
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-04T16:44:36Z
summary:
GH-81079: Add case_sensitive argument to `pathlib.Path.glob()` (GH-102710)
This argument allows case-sensitive matching to be enabled on Windows, and
case-insensitive matching to be enabled on Posix.
Co-authored-by: Steve Dower <steve.dower at microsoft.com>
files:
A Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst
M Doc/library/pathlib.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 4847ac24c775..14118127835b 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -855,7 +855,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5
-.. method:: Path.glob(pattern)
+.. method:: Path.glob(pattern, *, case_sensitive=None)
Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
@@ -876,6 +876,11 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]
+ By default, or when the *case_sensitive* keyword-only argument is set to
+ ``None``, this method matches paths using platform-specific casing rules:
+ typically, case-sensitive on POSIX, and case-insensitive on Windows.
+ Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
+
.. note::
Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time.
@@ -886,6 +891,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).
+ .. versionadded:: 3.12
+ The *case_sensitive* argument.
+
.. method:: Path.group()
Return the name of the group owning the file. :exc:`KeyError` is raised
@@ -1271,7 +1279,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).
-.. method:: Path.rglob(pattern)
+.. method:: Path.rglob(pattern, *, case_sensitive=None)
Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1284,12 +1292,20 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]
+ By default, or when the *case_sensitive* keyword-only argument is set to
+ ``None``, this method matches paths using platform-specific casing rules:
+ typically, case-sensitive on POSIX, and case-insensitive on Windows.
+ Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
+
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).
+ .. versionadded:: 3.12
+ The *case_sensitive* argument.
+
.. method:: Path.rmdir()
Remove this directory. The directory must be empty.
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 8a1651c23d7f..f32e1e2d8228 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -62,7 +62,7 @@ def _is_case_sensitive(flavour):
#
@functools.lru_cache()
-def _make_selector(pattern_parts, flavour):
+def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
child_parts = pattern_parts[1:]
if not pat:
@@ -75,17 +75,17 @@ def _make_selector(pattern_parts, flavour):
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
cls = _WildcardSelector
- return cls(pat, child_parts, flavour)
+ return cls(pat, child_parts, flavour, case_sensitive)
class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""
- def __init__(self, child_parts, flavour):
+ def __init__(self, child_parts, flavour, case_sensitive):
self.child_parts = child_parts
if child_parts:
- self.successor = _make_selector(child_parts, flavour)
+ self.successor = _make_selector(child_parts, flavour, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
@@ -108,8 +108,9 @@ def _select_from(self, parent_path, scandir):
class _ParentSelector(_Selector):
- def __init__(self, name, child_parts, flavour):
- _Selector.__init__(self, child_parts, flavour)
+
+ def __init__(self, name, child_parts, flavour, case_sensitive):
+ _Selector.__init__(self, child_parts, flavour, case_sensitive)
def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..')
@@ -119,10 +120,13 @@ def _select_from(self, parent_path, scandir):
class _WildcardSelector(_Selector):
- def __init__(self, pat, child_parts, flavour):
- flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
+ def __init__(self, pat, child_parts, flavour, case_sensitive):
+ _Selector.__init__(self, child_parts, flavour, case_sensitive)
+ if case_sensitive is None:
+ # TODO: evaluate case-sensitivity of each directory in _select_from()
+ case_sensitive = _is_case_sensitive(flavour)
+ flags = re.NOFLAG if case_sensitive else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
- _Selector.__init__(self, child_parts, flavour)
def _select_from(self, parent_path, scandir):
try:
@@ -153,8 +157,8 @@ def _select_from(self, parent_path, scandir):
class _RecursiveWildcardSelector(_Selector):
- def __init__(self, pat, child_parts, flavour):
- _Selector.__init__(self, child_parts, flavour)
+ def __init__(self, pat, child_parts, flavour, case_sensitive):
+ _Selector.__init__(self, child_parts, flavour, case_sensitive)
def _iterate_directories(self, parent_path, scandir):
yield parent_path
@@ -819,7 +823,7 @@ def _scandir(self):
# includes scandir(), which is used to implement glob().
return os.scandir(self)
- def glob(self, pattern):
+ def glob(self, pattern, *, case_sensitive=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
@@ -831,11 +835,11 @@ def glob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
- selector = _make_selector(tuple(pattern_parts), self._flavour)
+ selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
yield p
- def rglob(self, pattern):
+ def rglob(self, pattern, *, case_sensitive=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
@@ -846,7 +850,7 @@ def rglob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
- selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
+ selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
yield p
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 424bb92a87d1..a932e03df423 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1816,6 +1816,18 @@ def _check(glob, expected):
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
+ def test_glob_case_sensitive(self):
+ P = self.cls
+ def _check(path, pattern, case_sensitive, expected):
+ actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
+ expected = {str(P(BASE, q)) for q in expected}
+ self.assertEqual(actual, expected)
+ path = P(BASE)
+ _check(path, "DIRB/FILE*", True, [])
+ _check(path, "DIRB/FILE*", False, ["dirB/fileB"])
+ _check(path, "dirb/file*", True, [])
+ _check(path, "dirb/file*", False, ["dirB/fileB"])
+
def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(BASE, q) for q in expected })
diff --git a/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst
new file mode 100644
index 000000000000..ef5690533985
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst
@@ -0,0 +1,2 @@
+Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
+:meth:`~pathlib.Path.rglob`.
More information about the Python-checkins
mailing list