[Python-checkins] GH-104102: Optimize `pathlib.Path.glob()` handling of `../` pattern segments (GH-104103)
barneygale
webhook-mailer at python.org
Tue May 2 19:16:34 EDT 2023
https://github.com/python/cpython/commit/65a49c6553a27cc36eebb4b79f409c3cb4450d8c
commit: 65a49c6553a27cc36eebb4b79f409c3cb4450d8c
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-02T23:16:04Z
summary:
GH-104102: Optimize `pathlib.Path.glob()` handling of `../` pattern segments (GH-104103)
These segments do not require a `stat()` call, as the selector's
`_select_from()` method is called after we've established that the
parent is a directory.
files:
A Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 61e7f3e4430c..c69089f4e1bc 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -74,6 +74,8 @@ def _make_selector(pattern_parts, flavour):
return _TerminatingSelector()
if pat == '**':
cls = _RecursiveWildcardSelector
+ elif pat == '..':
+ cls = _ParentSelector
elif '**' in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif _is_wildcard_pattern(pat):
@@ -114,6 +116,16 @@ def _select_from(self, parent_path, is_dir, exists, scandir):
yield parent_path
+class _ParentSelector(_Selector):
+ def __init__(self, name, child_parts, flavour):
+ _Selector.__init__(self, child_parts, flavour)
+
+ def _select_from(self, parent_path, is_dir, exists, scandir):
+ path = parent_path._make_child_relpath('..')
+ for p in self.successor._select_from(path, is_dir, exists, scandir):
+ yield p
+
+
class _PreciseSelector(_Selector):
def __init__(self, name, child_parts, flavour):
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 8b5b61a818bb..9902b7242205 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1892,8 +1892,13 @@ def test_glob_dotdot(self):
P = self.cls
p = P(BASE)
self.assertEqual(set(p.glob("..")), { P(BASE, "..") })
+ self.assertEqual(set(p.glob("../..")), { P(BASE, "..", "..") })
+ self.assertEqual(set(p.glob("dirA/..")), { P(BASE, "dirA", "..") })
self.assertEqual(set(p.glob("dirA/../file*")), { P(BASE, "dirA/../fileA") })
+ self.assertEqual(set(p.glob("dirA/../file*/..")), set())
self.assertEqual(set(p.glob("../xyzzy")), set())
+ self.assertEqual(set(p.glob("xyzzy/..")), set())
+ self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(BASE, *[".."] * 50)})
@os_helper.skip_unless_symlink
def test_glob_permissions(self):
diff --git a/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst b/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst
new file mode 100644
index 000000000000..7101de908a50
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst
@@ -0,0 +1,2 @@
+Improve performance of :meth:`pathlib.Path.glob` when evaluating patterns
+that contain ``'../'`` segments.
More information about the Python-checkins
mailing list