[Python-checkins] GH-100479: Add `pathlib.PurePath.with_segments()` (GH-103975)

barneygale webhook-mailer at python.org
Fri May 5 15:05:00 EDT 2023


https://github.com/python/cpython/commit/d00d94214971621e6a3541425ee8c8072023ca1a
commit: d00d94214971621e6a3541425ee8c8072023ca1a
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: barneygale <barney.gale at gmail.com>
date: 2023-05-05T19:04:53Z
summary:

GH-100479: Add `pathlib.PurePath.with_segments()` (GH-103975)

Add `pathlib.PurePath.with_segments()`, which creates a path object from arguments. This method is called whenever a derivative path is created, such as from `pathlib.PurePath.parent`. Subclasses may override this method to share information between path objects.

Co-authored-by: Alex Waygood <Alex.Waygood at Gmail.com>

files:
A Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst
M Doc/library/pathlib.rst
M Doc/whatsnew/3.12.rst
M Lib/pathlib.py
M Lib/test/test_pathlib.py

diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 14118127835b..5ffa33d4e61f 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -530,10 +530,10 @@ Pure paths provide the following methods and properties:
    unintended effects.
 
 
-.. method:: PurePath.joinpath(*other)
+.. method:: PurePath.joinpath(*pathsegments)
 
    Calling this method is equivalent to combining the path with each of
-   the *other* arguments in turn::
+   the given *pathsegments* in turn::
 
       >>> PurePosixPath('/etc').joinpath('passwd')
       PurePosixPath('/etc/passwd')
@@ -680,6 +680,30 @@ Pure paths provide the following methods and properties:
       PureWindowsPath('README')
 
 
+.. method:: PurePath.with_segments(*pathsegments)
+
+   Create a new path object of the same type by combining the given
+   *pathsegments*. This method is called whenever a derivative path is created,
+   such as from :attr:`parent` and :meth:`relative_to`. Subclasses may
+   override this method to pass information to derivative paths, for example::
+
+      from pathlib import PurePosixPath
+
+      class MyPath(PurePosixPath):
+          def __init__(self, *pathsegments, session_id):
+              super().__init__(*pathsegments)
+              self.session_id = session_id
+
+          def with_segments(self, *pathsegments):
+              return type(self)(*pathsegments, session_id=self.session_id)
+
+      etc = MyPath('/etc', session_id=42)
+      hosts = etc / 'hosts'
+      print(hosts.session_id)  # 42
+
+   .. versionadded:: 3.12
+
+
 .. _concrete-paths:
 
 
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 4f952e2a37ef..ccddc8bd832f 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -348,6 +348,11 @@ inspect
 pathlib
 -------
 
+* Add support for subclassing :class:`pathlib.PurePath` and
+  :class:`~pathlib.Path`, plus their Posix- and Windows-specific variants.
+  Subclasses may override the :meth:`~pathlib.PurePath.with_segments` method
+  to pass information between path instances.
+
 * Add :meth:`~pathlib.Path.walk` for walking the directory trees and generating
   all file or directory names within them, similar to :func:`os.walk`.
   (Contributed by Stanislav Zmiev in :gh:`90385`.)
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index f32e1e2d8228..9aa3c1e52447 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -204,11 +204,10 @@ def _select_from(self, parent_path, scandir):
 class _PathParents(Sequence):
     """This object provides sequence-like access to the logical ancestors
     of a path.  Don't try to construct it yourself."""
-    __slots__ = ('_pathcls', '_drv', '_root', '_tail')
+    __slots__ = ('_path', '_drv', '_root', '_tail')
 
     def __init__(self, path):
-        # We don't store the instance to avoid reference cycles
-        self._pathcls = type(path)
+        self._path = path
         self._drv = path.drive
         self._root = path.root
         self._tail = path._tail
@@ -224,11 +223,11 @@ def __getitem__(self, idx):
             raise IndexError(idx)
         if idx < 0:
             idx += len(self)
-        return self._pathcls._from_parsed_parts(self._drv, self._root,
-                                                self._tail[:-idx - 1])
+        return self._path._from_parsed_parts(self._drv, self._root,
+                                             self._tail[:-idx - 1])
 
     def __repr__(self):
-        return "<{}.parents>".format(self._pathcls.__name__)
+        return "<{}.parents>".format(type(self._path).__name__)
 
 
 class PurePath(object):
@@ -316,6 +315,13 @@ def __init__(self, *args):
         else:
             self._raw_path = self._flavour.join(*paths)
 
+    def with_segments(self, *pathsegments):
+        """Construct a new path object from any number of path-like objects.
+        Subclasses may override this method to customize how new path objects
+        are created from methods like `iterdir()`.
+        """
+        return type(self)(*pathsegments)
+
     @classmethod
     def _parse_path(cls, path):
         if not path:
@@ -342,15 +348,14 @@ def _load_parts(self):
         self._root = root
         self._tail_cached = tail
 
-    @classmethod
-    def _from_parsed_parts(cls, drv, root, tail):
-        path = cls._format_parsed_parts(drv, root, tail)
-        self = cls(path)
-        self._str = path or '.'
-        self._drv = drv
-        self._root = root
-        self._tail_cached = tail
-        return self
+    def _from_parsed_parts(self, drv, root, tail):
+        path_str = self._format_parsed_parts(drv, root, tail)
+        path = self.with_segments(path_str)
+        path._str = path_str or '.'
+        path._drv = drv
+        path._root = root
+        path._tail_cached = tail
+        return path
 
     @classmethod
     def _format_parsed_parts(cls, drv, root, tail):
@@ -584,8 +589,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
                    "scheduled for removal in Python {remove}")
             warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg,
                                  remove=(3, 14))
-        path_cls = type(self)
-        other = path_cls(other, *_deprecated)
+        other = self.with_segments(other, *_deprecated)
         for step, path in enumerate([other] + list(other.parents)):
             if self.is_relative_to(path):
                 break
@@ -594,7 +598,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
         if step and not walk_up:
             raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
         parts = ['..'] * step + self._tail[len(path._tail):]
-        return path_cls(*parts)
+        return self.with_segments(*parts)
 
     def is_relative_to(self, other, /, *_deprecated):
         """Return True if the path is relative to another path or False.
@@ -605,7 +609,7 @@ def is_relative_to(self, other, /, *_deprecated):
                    "scheduled for removal in Python {remove}")
             warnings._deprecated("pathlib.PurePath.is_relative_to(*args)",
                                  msg, remove=(3, 14))
-        other = type(self)(other, *_deprecated)
+        other = self.with_segments(other, *_deprecated)
         return other == self or other in self.parents
 
     @property
@@ -617,13 +621,13 @@ def parts(self):
         else:
             return tuple(self._tail)
 
-    def joinpath(self, *args):
+    def joinpath(self, *pathsegments):
         """Combine this path with one or several arguments, and return a
         new path representing either a subpath (if all arguments are relative
         paths) or a totally different path (if one of the arguments is
         anchored).
         """
-        return self.__class__(self, *args)
+        return self.with_segments(self, *pathsegments)
 
     def __truediv__(self, key):
         try:
@@ -633,7 +637,7 @@ def __truediv__(self, key):
 
     def __rtruediv__(self, key):
         try:
-            return type(self)(key, self)
+            return self.with_segments(key, self)
         except TypeError:
             return NotImplemented
 
@@ -650,6 +654,8 @@ def parent(self):
     @property
     def parents(self):
         """A sequence of this path's logical parents."""
+        # The value of this property should not be cached on the path object,
+        # as doing so would introduce a reference cycle.
         return _PathParents(self)
 
     def is_absolute(self):
@@ -680,7 +686,7 @@ def match(self, path_pattern):
         """
         Return True if this path matches the given pattern.
         """
-        pat = type(self)(path_pattern)
+        pat = self.with_segments(path_pattern)
         if not pat.parts:
             raise ValueError("empty pattern")
         pat_parts = pat._parts_normcase
@@ -755,7 +761,7 @@ def _make_child_relpath(self, name):
             path_str = f'{path_str}{name}'
         else:
             path_str = name
-        path = type(self)(path_str)
+        path = self.with_segments(path_str)
         path._str = path_str
         path._drv = self.drive
         path._root = self.root
@@ -805,7 +811,7 @@ def samefile(self, other_path):
         try:
             other_st = other_path.stat()
         except AttributeError:
-            other_st = self.__class__(other_path).stat()
+            other_st = self.with_segments(other_path).stat()
         return self._flavour.samestat(st, other_st)
 
     def iterdir(self):
@@ -867,7 +873,7 @@ def absolute(self):
             cwd = self._flavour.abspath(self.drive)
         else:
             cwd = os.getcwd()
-        return type(self)(cwd, self)
+        return self.with_segments(cwd, self)
 
     def resolve(self, strict=False):
         """
@@ -885,7 +891,7 @@ def check_eloop(e):
         except OSError as e:
             check_eloop(e)
             raise
-        p = type(self)(s)
+        p = self.with_segments(s)
 
         # In non-strict mode, realpath() doesn't raise on symlink loops.
         # Ensure we get an exception by calling stat()
@@ -975,7 +981,7 @@ def readlink(self):
         """
         if not hasattr(os, "readlink"):
             raise NotImplementedError("os.readlink() not available on this system")
-        return type(self)(os.readlink(self))
+        return self.with_segments(os.readlink(self))
 
     def touch(self, mode=0o666, exist_ok=True):
         """
@@ -1064,7 +1070,7 @@ def rename(self, target):
         Returns the new Path instance pointing to the target path.
         """
         os.rename(self, target)
-        return self.__class__(target)
+        return self.with_segments(target)
 
     def replace(self, target):
         """
@@ -1077,7 +1083,7 @@ def replace(self, target):
         Returns the new Path instance pointing to the target path.
         """
         os.replace(self, target)
-        return self.__class__(target)
+        return self.with_segments(target)
 
     def symlink_to(self, target, target_is_directory=False):
         """
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index a932e03df423..7586610833b0 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -29,11 +29,12 @@
 #
 
 class _BasePurePathSubclass(object):
-    init_called = False
+    def __init__(self, *pathsegments, session_id):
+        super().__init__(*pathsegments)
+        self.session_id = session_id
 
-    def __init__(self, *args):
-        super().__init__(*args)
-        self.init_called = True
+    def with_segments(self, *pathsegments):
+        return type(self)(*pathsegments, session_id=self.session_id)
 
 
 class _BasePurePathTest(object):
@@ -121,20 +122,21 @@ def test_str_subclass_common(self):
         self._check_str_subclass('a/b.txt')
         self._check_str_subclass('/a/b.txt')
 
-    def test_init_called_common(self):
+    def test_with_segments_common(self):
         class P(_BasePurePathSubclass, self.cls):
             pass
-        p = P('foo', 'bar')
-        self.assertTrue((p / 'foo').init_called)
-        self.assertTrue(('foo' / p).init_called)
-        self.assertTrue(p.joinpath('foo').init_called)
-        self.assertTrue(p.with_name('foo').init_called)
-        self.assertTrue(p.with_stem('foo').init_called)
-        self.assertTrue(p.with_suffix('.foo').init_called)
-        self.assertTrue(p.relative_to('foo').init_called)
-        self.assertTrue(p.parent.init_called)
+        p = P('foo', 'bar', session_id=42)
+        self.assertEqual(42, (p / 'foo').session_id)
+        self.assertEqual(42, ('foo' / p).session_id)
+        self.assertEqual(42, p.joinpath('foo').session_id)
+        self.assertEqual(42, p.with_name('foo').session_id)
+        self.assertEqual(42, p.with_stem('foo').session_id)
+        self.assertEqual(42, p.with_suffix('.foo').session_id)
+        self.assertEqual(42, p.with_segments('foo').session_id)
+        self.assertEqual(42, p.relative_to('foo').session_id)
+        self.assertEqual(42, p.parent.session_id)
         for parent in p.parents:
-            self.assertTrue(parent.init_called)
+            self.assertEqual(42, parent.session_id)
 
     def _get_drive_root_parts(self, parts):
         path = self.cls(*parts)
@@ -1647,6 +1649,26 @@ def test_home(self):
             env['HOME'] = os.path.join(BASE, 'home')
             self._test_home(self.cls.home())
 
+    def test_with_segments(self):
+        class P(_BasePurePathSubclass, self.cls):
+            pass
+        p = P(BASE, session_id=42)
+        self.assertEqual(42, p.absolute().session_id)
+        self.assertEqual(42, p.resolve().session_id)
+        self.assertEqual(42, p.with_segments('~').expanduser().session_id)
+        self.assertEqual(42, (p / 'fileA').rename(p / 'fileB').session_id)
+        self.assertEqual(42, (p / 'fileB').replace(p / 'fileA').session_id)
+        if os_helper.can_symlink():
+            self.assertEqual(42, (p / 'linkA').readlink().session_id)
+        for path in p.iterdir():
+            self.assertEqual(42, path.session_id)
+        for path in p.glob('*'):
+            self.assertEqual(42, path.session_id)
+        for path in p.rglob('*'):
+            self.assertEqual(42, path.session_id)
+        for dirpath, dirnames, filenames in p.walk():
+            self.assertEqual(42, dirpath.session_id)
+
     def test_samefile(self):
         fileA_path = os.path.join(BASE, 'fileA')
         fileB_path = os.path.join(BASE, 'dirB', 'fileB')
diff --git a/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst
new file mode 100644
index 000000000000..58db90480d2f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst
@@ -0,0 +1,4 @@
+Add :meth:`pathlib.PurePath.with_segments`, which creates a path object from
+arguments. This method is called whenever a derivative path is created, such
+as from :attr:`pathlib.PurePath.parent`. Subclasses may override this method
+to share information between path objects.



More information about the Python-checkins mailing list