[Python-checkins] [3.12] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) (GH-105483)

barneygale webhook-mailer at python.org
Wed Jun 7 19:25:52 EDT 2023


https://github.com/python/cpython/commit/ae25f1c8e5522dee4131a3b48490cdb199e9ae22
commit: ae25f1c8e5522dee4131a3b48490cdb199e9ae22
branch: 3.12
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-06-08T00:25:45+01:00
summary:

[3.12] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) (GH-105483)

Joining of arguments is moved to `_load_parts`, which is called when a
normalized path is needed.

(cherry picked from commit ffeaec7e60c88d585deacb10264ba7a96e5e52df)

files:
A Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
M Lib/pathlib.py

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 29517e4c74db..d279fd2958b1 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -293,9 +293,9 @@ class PurePath(object):
     """
 
     __slots__ = (
-        # The `_raw_path` slot stores an unnormalized string path. This is set
+        # The `_raw_paths` slot stores unnormalized string paths. This is set
         # in the `__init__()` method.
-        '_raw_path',
+        '_raw_paths',
 
         # The `_drv`, `_root` and `_tail_cached` slots store parsed and
         # normalized parts of the path. They are set when any of the `drive`,
@@ -352,10 +352,11 @@ def __init__(self, *args):
         paths = []
         for arg in args:
             if isinstance(arg, PurePath):
-                path = arg._raw_path
                 if arg._flavour is ntpath and self._flavour is posixpath:
                     # GH-103631: Convert separators for backwards compatibility.
-                    path = path.replace('\\', '/')
+                    paths.extend(path.replace('\\', '/') for path in arg._raw_paths)
+                else:
+                    paths.extend(arg._raw_paths)
             else:
                 try:
                     path = os.fspath(arg)
@@ -366,13 +367,8 @@ def __init__(self, *args):
                         "argument should be a str or an os.PathLike "
                         "object where __fspath__ returns a str, "
                         f"not {type(path).__name__!r}")
-            paths.append(path)
-        if len(paths) == 0:
-            self._raw_path = ''
-        elif len(paths) == 1:
-            self._raw_path = paths[0]
-        else:
-            self._raw_path = self._flavour.join(*paths)
+                paths.append(path)
+        self._raw_paths = paths
 
     def with_segments(self, *pathsegments):
         """Construct a new path object from any number of path-like objects.
@@ -402,7 +398,14 @@ def _parse_path(cls, path):
         return drv, root, parsed
 
     def _load_parts(self):
-        drv, root, tail = self._parse_path(self._raw_path)
+        paths = self._raw_paths
+        if len(paths) == 0:
+            path = ''
+        elif len(paths) == 1:
+            path = paths[0]
+        else:
+            path = self._flavour.join(*paths)
+        drv, root, tail = self._parse_path(path)
         self._drv = drv
         self._root = root
         self._tail_cached = tail
@@ -733,10 +736,17 @@ def parents(self):
     def is_absolute(self):
         """True if the path is absolute (has both a root and, if applicable,
         a drive)."""
-        # ntpath.isabs() is defective - see GH-44626 .
         if self._flavour is ntpath:
+            # ntpath.isabs() is defective - see GH-44626.
             return bool(self.drive and self.root)
-        return self._flavour.isabs(self._raw_path)
+        elif self._flavour is posixpath:
+            # Optimization: work with raw paths on POSIX.
+            for path in self._raw_paths:
+                if path.startswith('/'):
+                    return True
+            return False
+        else:
+            return self._flavour.isabs(str(self))
 
     def is_reserved(self):
         """Return True if the path contains one of the special names reserved
diff --git a/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
new file mode 100644
index 000000000000..8b81b681af94
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst
@@ -0,0 +1,2 @@
+Improve performance of :class:`pathlib.PurePath` initialisation by
+deferring joining of paths when multiple arguments are given.



More information about the Python-checkins mailing list