[Python-checkins] [3.12] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) (GH-105483)
barneygale
webhook-mailer at python.org
Wed Jun 7 19:25:52 EDT 2023
https://github.com/python/cpython/commit/ae25f1c8e5522dee4131a3b48490cdb199e9ae22
commit: ae25f1c8e5522dee4131a3b48490cdb199e9ae22
branch: 3.12
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-06-08T00:25:45+01:00
summary:
[3.12] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) (GH-105483)
Joining of arguments is moved to `_load_parts`, which is called when a
normalized path is needed.
(cherry picked from commit ffeaec7e60c88d585deacb10264ba7a96e5e52df)
files:
A Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
M Lib/pathlib.py
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 29517e4c74db..d279fd2958b1 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -293,9 +293,9 @@ class PurePath(object):
"""
__slots__ = (
- # The `_raw_path` slot stores an unnormalized string path. This is set
+ # The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
- '_raw_path',
+ '_raw_paths',
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
@@ -352,10 +352,11 @@ def __init__(self, *args):
paths = []
for arg in args:
if isinstance(arg, PurePath):
- path = arg._raw_path
if arg._flavour is ntpath and self._flavour is posixpath:
# GH-103631: Convert separators for backwards compatibility.
- path = path.replace('\\', '/')
+ paths.extend(path.replace('\\', '/') for path in arg._raw_paths)
+ else:
+ paths.extend(arg._raw_paths)
else:
try:
path = os.fspath(arg)
@@ -366,13 +367,8 @@ def __init__(self, *args):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
- paths.append(path)
- if len(paths) == 0:
- self._raw_path = ''
- elif len(paths) == 1:
- self._raw_path = paths[0]
- else:
- self._raw_path = self._flavour.join(*paths)
+ paths.append(path)
+ self._raw_paths = paths
def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects.
@@ -402,7 +398,14 @@ def _parse_path(cls, path):
return drv, root, parsed
def _load_parts(self):
- drv, root, tail = self._parse_path(self._raw_path)
+ paths = self._raw_paths
+ if len(paths) == 0:
+ path = ''
+ elif len(paths) == 1:
+ path = paths[0]
+ else:
+ path = self._flavour.join(*paths)
+ drv, root, tail = self._parse_path(path)
self._drv = drv
self._root = root
self._tail_cached = tail
@@ -733,10 +736,17 @@ def parents(self):
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
- # ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
+ # ntpath.isabs() is defective - see GH-44626.
return bool(self.drive and self.root)
- return self._flavour.isabs(self._raw_path)
+ elif self._flavour is posixpath:
+ # Optimization: work with raw paths on POSIX.
+ for path in self._raw_paths:
+ if path.startswith('/'):
+ return True
+ return False
+ else:
+ return self._flavour.isabs(str(self))
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
diff --git a/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
new file mode 100644
index 000000000000..8b81b681af94
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
@@ -0,0 +1,2 @@
+Improve performance of :class:`pathlib.PurePath` initialisation by
+deferring joining of paths when multiple arguments are given.
More information about the Python-checkins
mailing list