[Python-checkins] gh-101000: Add os.path.splitroot() (#101002)

AlexWaygood webhook-mailer at python.org
Thu Jan 26 19:28:35 EST 2023


https://github.com/python/cpython/commit/e5b08ddddf1099f04bf65e63017de840bd4b5980
commit: e5b08ddddf1099f04bf65e63017de840bd4b5980
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: AlexWaygood <Alex.Waygood at Gmail.com>
date: 2023-01-27T00:28:27Z
summary:

gh-101000: Add os.path.splitroot() (#101002)

Co-authored-by: Eryk Sun <eryksun at gmail.com>
Co-authored-by: Alex Waygood <Alex.Waygood at Gmail.com>

files:
A Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst
M Doc/library/os.path.rst
M Doc/whatsnew/3.12.rst
M Lib/ntpath.py
M Lib/pathlib.py
M Lib/posixpath.py
M Lib/test/test_ntpath.py
M Lib/test/test_pathlib.py
M Lib/test/test_posixpath.py

diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index 42bbe24830e6..786c2fd7f64f 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -488,6 +488,39 @@ the :mod:`glob` module.)
       Accepts a :term:`path-like object`.
 
 
+.. function:: splitroot(path)
+
+   Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where
+   *drive* is a device name or mount point, *root* is a string of separators
+   after the drive, and *tail* is everything after the root. Any of these
+   items may be the empty string. In all cases, ``drive + root + tail`` will
+   be the same as *path*.
+
+   On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is
+   relative), a single forward slash (if *path* is absolute), or two forward slashes
+   (implementation-defined per `IEEE Std 1003.1-2017; 4.13 Pathname Resolution
+   <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13>`_.)
+   For example::
+
+      >>> splitroot('/home/sam')
+      ('', '/', 'home/sam')
+      >>> splitroot('//home/sam')
+      ('', '//', 'home/sam')
+      >>> splitroot('///home/sam')
+      ('', '/', '//home/sam')
+
+   On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device
+   name. The *root* may be empty, a forward slash, or a backward slash. For
+   example::
+
+      >>> splitroot('C:/Users/Sam')
+      ('C:', '/', 'Users/Sam')
+      >>> splitroot('//Server/Share/Users/Sam')
+      ('//Server/Share', '/', 'Users/Sam')
+
+   .. versionadded:: 3.12
+
+
 .. function:: splitext(path)
 
    Split the pathname *path* into a pair ``(root, ext)``  such that ``root + ext ==
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 2f9ca1102d3d..a071159b800a 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -288,13 +288,18 @@ os
   for a process with :func:`os.pidfd_open` in non-blocking mode.
   (Contributed by Kumar Aditya in :gh:`93312`.)
 
-* Add :func:`os.path.isjunction` to check if a given path is a junction.
-  (Contributed by Charles Machalow in :gh:`99547`.)
-
 * :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction`
   method to check if the entry is a junction.
   (Contributed by Charles Machalow in :gh:`99547`.)
 
+os.path
+-------
+
+* Add :func:`os.path.isjunction` to check if a given path is a junction.
+  (Contributed by Charles Machalow in :gh:`99547`.)
+
+* Add :func:`os.path.splitroot` to split a path into a triad
+  ``(drive, root, tail)``. (Contributed by Barney Gale in :gh:`101000`.)
 
 shutil
 ------
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index cd7fb58a88de..f9ee8e02a576 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -24,7 +24,7 @@
 from genericpath import *
 
 
-__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
+__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
            "basename","dirname","commonprefix","getsize","getmtime",
            "getatime","getctime", "islink","exists","lexists","isdir","isfile",
            "ismount", "expanduser","expandvars","normpath","abspath",
@@ -117,19 +117,21 @@ def join(path, *paths):
     try:
         if not paths:
             path[:0] + sep  #23780: Ensure compatible data type even if p is null.
-        result_drive, result_path = splitdrive(path)
+        result_drive, result_root, result_path = splitroot(path)
         for p in map(os.fspath, paths):
-            p_drive, p_path = splitdrive(p)
-            if p_path and p_path[0] in seps:
+            p_drive, p_root, p_path = splitroot(p)
+            if p_root:
                 # Second path is absolute
                 if p_drive or not result_drive:
                     result_drive = p_drive
+                result_root = p_root
                 result_path = p_path
                 continue
             elif p_drive and p_drive != result_drive:
                 if p_drive.lower() != result_drive.lower():
                     # Different drives => ignore the first path entirely
                     result_drive = p_drive
+                    result_root = p_root
                     result_path = p_path
                     continue
                 # Same drive in different case
@@ -139,10 +141,10 @@ def join(path, *paths):
                 result_path = result_path + sep
             result_path = result_path + p_path
         ## add separator between UNC and non-absolute path
-        if (result_path and result_path[0] not in seps and
+        if (result_path and not result_root and
             result_drive and result_drive[-1:] != colon):
             return result_drive + sep + result_path
-        return result_drive + result_path
+        return result_drive + result_root + result_path
     except (TypeError, AttributeError, BytesWarning):
         genericpath._check_arg_types('join', path, *paths)
         raise
@@ -169,35 +171,61 @@ def splitdrive(p):
 
     Paths cannot contain both a drive letter and a UNC path.
 
+    """
+    drive, root, tail = splitroot(p)
+    return drive, root + tail
+
+
+def splitroot(p):
+    """Split a pathname into drive, root and tail. The drive is defined
+    exactly as in splitdrive(). On Windows, the root may be a single path
+    separator or an empty string. The tail contains anything after the root.
+    For example:
+
+        splitroot('//server/share/') == ('//server/share', '/', '')
+        splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
+        splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
+        splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
     """
     p = os.fspath(p)
-    if len(p) >= 2:
-        if isinstance(p, bytes):
-            sep = b'\\'
-            altsep = b'/'
-            colon = b':'
-            unc_prefix = b'\\\\?\\UNC\\'
-        else:
-            sep = '\\'
-            altsep = '/'
-            colon = ':'
-            unc_prefix = '\\\\?\\UNC\\'
-        normp = p.replace(altsep, sep)
-        if normp[0:2] == sep * 2:
+    if isinstance(p, bytes):
+        sep = b'\\'
+        altsep = b'/'
+        colon = b':'
+        unc_prefix = b'\\\\?\\UNC\\'
+        empty = b''
+    else:
+        sep = '\\'
+        altsep = '/'
+        colon = ':'
+        unc_prefix = '\\\\?\\UNC\\'
+        empty = ''
+    normp = p.replace(altsep, sep)
+    if normp[:1] == sep:
+        if normp[1:2] == sep:
             # UNC drives, e.g. \\server\share or \\?\UNC\server\share
             # Device drives, e.g. \\.\device or \\?\device
             start = 8 if normp[:8].upper() == unc_prefix else 2
             index = normp.find(sep, start)
             if index == -1:
-                return p, p[:0]
+                return p, empty, empty
             index2 = normp.find(sep, index + 1)
             if index2 == -1:
-                return p, p[:0]
-            return p[:index2], p[index2:]
-        if normp[1:2] == colon:
-            # Drive-letter drives, e.g. X:
-            return p[:2], p[2:]
-    return p[:0], p
+                return p, empty, empty
+            return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
+        else:
+            # Relative path with root, e.g. \Windows
+            return empty, p[:1], p[1:]
+    elif normp[1:2] == colon:
+        if normp[2:3] == sep:
+            # Absolute drive-letter path, e.g. X:\Windows
+            return p[:2], p[2:3], p[3:]
+        else:
+            # Relative path with drive, e.g. X:Windows
+            return p[:2], empty, p[2:]
+    else:
+        # Relative path, e.g. Windows
+        return empty, empty, p
 
 
 # Split a path in head (everything up to the last '/') and tail (the
@@ -212,15 +240,13 @@ def split(p):
     Either part may be empty."""
     p = os.fspath(p)
     seps = _get_bothseps(p)
-    d, p = splitdrive(p)
+    d, r, p = splitroot(p)
     # set i to index beyond p's last slash
     i = len(p)
     while i and p[i-1] not in seps:
         i -= 1
     head, tail = p[:i], p[i:]  # now tail has no slashes
-    # remove trailing slashes from head, unless it's all slashes
-    head = head.rstrip(seps) or head
-    return d + head, tail
+    return d + r + head.rstrip(seps), tail
 
 
 # Split a path in root and extension.
@@ -311,10 +337,10 @@ def ismount(path):
     path = os.fspath(path)
     seps = _get_bothseps(path)
     path = abspath(path)
-    root, rest = splitdrive(path)
-    if root and root[0] in seps:
-        return (not rest) or (rest in seps)
-    if rest and rest in seps:
+    drive, root, rest = splitroot(path)
+    if drive and drive[0] in seps:
+        return not rest
+    if root and not rest:
         return True
 
     if _getvolumepathname:
@@ -525,13 +551,8 @@ def normpath(path):
             curdir = '.'
             pardir = '..'
         path = path.replace(altsep, sep)
-        prefix, path = splitdrive(path)
-
-        # collapse initial backslashes
-        if path.startswith(sep):
-            prefix += sep
-            path = path.lstrip(sep)
-
+        drive, root, path = splitroot(path)
+        prefix = drive + root
         comps = path.split(sep)
         i = 0
         while i < len(comps):
@@ -541,7 +562,7 @@ def normpath(path):
                 if i > 0 and comps[i-1] != pardir:
                     del comps[i-1:i+1]
                     i -= 1
-                elif i == 0 and prefix.endswith(sep):
+                elif i == 0 and root:
                     del comps[i]
                 else:
                     i += 1
@@ -765,8 +786,8 @@ def relpath(path, start=None):
     try:
         start_abs = abspath(normpath(start))
         path_abs = abspath(normpath(path))
-        start_drive, start_rest = splitdrive(start_abs)
-        path_drive, path_rest = splitdrive(path_abs)
+        start_drive, _, start_rest = splitroot(start_abs)
+        path_drive, _, path_rest = splitroot(path_abs)
         if normcase(start_drive) != normcase(path_drive):
             raise ValueError("path is on mount %r, start on mount %r" % (
                 path_drive, start_drive))
@@ -816,21 +837,19 @@ def commonpath(paths):
         curdir = '.'
 
     try:
-        drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths]
-        split_paths = [p.split(sep) for d, p in drivesplits]
+        drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths]
+        split_paths = [p.split(sep) for d, r, p in drivesplits]
 
-        try:
-            isabs, = set(p[:1] == sep for d, p in drivesplits)
-        except ValueError:
-            raise ValueError("Can't mix absolute and relative paths") from None
+        if len({r for d, r, p in drivesplits}) != 1:
+            raise ValueError("Can't mix absolute and relative paths")
 
         # Check that all drive letters or UNC paths match. The check is made only
         # now otherwise type errors for mixing strings and bytes would not be
         # caught.
-        if len(set(d for d, p in drivesplits)) != 1:
+        if len({d for d, r, p in drivesplits}) != 1:
             raise ValueError("Paths don't have the same drive")
 
-        drive, path = splitdrive(paths[0].replace(altsep, sep))
+        drive, root, path = splitroot(paths[0].replace(altsep, sep))
         common = path.split(sep)
         common = [c for c in common if c and c != curdir]
 
@@ -844,8 +863,7 @@ def commonpath(paths):
         else:
             common = common[:len(s1)]
 
-        prefix = drive + sep if isabs else drive
-        return prefix + sep.join(common)
+        return drive + root + sep.join(common)
     except (TypeError, AttributeError):
         genericpath._check_arg_types('commonpath', *paths)
         raise
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index ae7a62f8a4cd..17659bcd3e2d 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -271,19 +271,6 @@ def __reduce__(self):
         # when pickling related paths.
         return (self.__class__, tuple(self._parts))
 
-    @classmethod
-    def _split_root(cls, part):
-        sep = cls._flavour.sep
-        rel = cls._flavour.splitdrive(part)[1].lstrip(sep)
-        anchor = part.removesuffix(rel)
-        if anchor:
-            anchor = cls._flavour.normpath(anchor)
-        drv, root = cls._flavour.splitdrive(anchor)
-        if drv.startswith(sep):
-            # UNC paths always have a root.
-            root = sep
-        return drv, root, rel
-
     @classmethod
     def _parse_parts(cls, parts):
         if not parts:
@@ -293,7 +280,10 @@ def _parse_parts(cls, parts):
         path = cls._flavour.join(*parts)
         if altsep:
             path = path.replace(altsep, sep)
-        drv, root, rel = cls._split_root(path)
+        drv, root, rel = cls._flavour.splitroot(path)
+        if drv.startswith(sep):
+            # pathlib assumes that UNC paths always have a root.
+            root = sep
         unfiltered_parsed = [drv + root] + rel.split(sep)
         parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
         return drv, root, parsed
@@ -493,9 +483,9 @@ def with_name(self, name):
         """Return a new path with the file name changed."""
         if not self.name:
             raise ValueError("%r has an empty name" % (self,))
-        drv, root, parts = self._parse_parts((name,))
-        if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep]
-            or drv or root or len(parts) != 1):
+        f = self._flavour
+        drv, root, tail = f.splitroot(name)
+        if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
             raise ValueError("Invalid name %r" % (name))
         return self._from_parsed_parts(self._drv, self._root,
                                        self._parts[:-1] + [name])
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index 737f8a5c156d..32b5d6e105dd 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -28,7 +28,7 @@
 import genericpath
 from genericpath import *
 
-__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
+__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
            "basename","dirname","commonprefix","getsize","getmtime",
            "getatime","getctime","islink","exists","lexists","isdir","isfile",
            "ismount", "expanduser","expandvars","normpath","abspath",
@@ -135,6 +135,35 @@ def splitdrive(p):
     return p[:0], p
 
 
+def splitroot(p):
+    """Split a pathname into drive, root and tail. On Posix, drive is always
+    empty; the root may be empty, a single slash, or two slashes. The tail
+    contains anything after the root. For example:
+
+        splitroot('foo/bar') == ('', '', 'foo/bar')
+        splitroot('/foo/bar') == ('', '/', 'foo/bar')
+        splitroot('//foo/bar') == ('', '//', 'foo/bar')
+        splitroot('///foo/bar') == ('', '/', '//foo/bar')
+    """
+    p = os.fspath(p)
+    if isinstance(p, bytes):
+        sep = b'/'
+        empty = b''
+    else:
+        sep = '/'
+        empty = ''
+    if p[:1] != sep:
+        # Relative path, e.g.: 'foo'
+        return empty, empty, p
+    elif p[1:2] != sep or p[2:3] == sep:
+        # Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
+        return empty, sep, p[1:]
+    else:
+        # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
+        # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
+        return empty, p[:2], p[2:]
+
+
 # Return the tail (basename) part of a path, same as split(path)[1].
 
 def basename(p):
@@ -372,13 +401,7 @@ def normpath(path):
             dotdot = '..'
         if path == empty:
             return dot
-        initial_slashes = path.startswith(sep)
-        # POSIX allows one or two initial slashes, but treats three or more
-        # as single slash.
-        # (see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
-        if (initial_slashes and
-            path.startswith(sep*2) and not path.startswith(sep*3)):
-            initial_slashes = 2
+        _, initial_slashes, path = splitroot(path)
         comps = path.split(sep)
         new_comps = []
         for comp in comps:
@@ -390,9 +413,7 @@ def normpath(path):
             elif new_comps:
                 new_comps.pop()
         comps = new_comps
-        path = sep.join(comps)
-        if initial_slashes:
-            path = sep*initial_slashes + path
+        path = initial_slashes + sep.join(comps)
         return path or dot
 
 else:
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index f56de0be7721..bce38a534a6a 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -98,57 +98,106 @@ def test_splitext(self):
         tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d'))
 
     def test_splitdrive(self):
-        tester('ntpath.splitdrive("c:\\foo\\bar")',
-               ('c:', '\\foo\\bar'))
-        tester('ntpath.splitdrive("c:/foo/bar")',
-               ('c:', '/foo/bar'))
+        tester("ntpath.splitdrive('')", ('', ''))
+        tester("ntpath.splitdrive('foo')", ('', 'foo'))
+        tester("ntpath.splitdrive('foo\\bar')", ('', 'foo\\bar'))
+        tester("ntpath.splitdrive('foo/bar')", ('', 'foo/bar'))
+        tester("ntpath.splitdrive('\\')", ('', '\\'))
+        tester("ntpath.splitdrive('/')", ('', '/'))
+        tester("ntpath.splitdrive('\\foo\\bar')", ('', '\\foo\\bar'))
+        tester("ntpath.splitdrive('/foo/bar')", ('', '/foo/bar'))
+        tester('ntpath.splitdrive("c:foo\\bar")', ('c:', 'foo\\bar'))
+        tester('ntpath.splitdrive("c:foo/bar")', ('c:', 'foo/bar'))
+        tester('ntpath.splitdrive("c:\\foo\\bar")', ('c:', '\\foo\\bar'))
+        tester('ntpath.splitdrive("c:/foo/bar")', ('c:', '/foo/bar'))
+        tester("ntpath.splitdrive('\\\\')", ('\\\\', ''))
+        tester("ntpath.splitdrive('//')", ('//', ''))
         tester('ntpath.splitdrive("\\\\conky\\mountpoint\\foo\\bar")',
                ('\\\\conky\\mountpoint', '\\foo\\bar'))
         tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")',
                ('//conky/mountpoint', '/foo/bar'))
-        tester('ntpath.splitdrive("\\\\\\conky\\mountpoint\\foo\\bar")',
-            ('\\\\\\conky', '\\mountpoint\\foo\\bar'))
-        tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")',
-            ('///conky', '/mountpoint/foo/bar'))
-        tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")',
-               ('\\\\conky\\', '\\mountpoint\\foo\\bar'))
-        tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")',
-               ('//conky/', '/mountpoint/foo/bar'))
-        # Issue #19911: UNC part containing U+0130
-        self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'),
-                         ('//conky/MOUNTPOİNT', '/foo/bar'))
-        # gh-81790: support device namespace, including UNC drives.
-        tester('ntpath.splitdrive("//?/c:")', ("//?/c:", ""))
-        tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/"))
-        tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir"))
-        tester('ntpath.splitdrive("//?/UNC")', ("//?/UNC", ""))
-        tester('ntpath.splitdrive("//?/UNC/")', ("//?/UNC/", ""))
-        tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", ""))
-        tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", ""))
-        tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir"))
-        tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")',
-               ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam'))
-        tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/"))
-
-        tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", ""))
-        tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\"))
-        tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir"))
-        tester('ntpath.splitdrive("\\\\?\\UNC")', ("\\\\?\\UNC", ""))
-        tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", ""))
-        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", ""))
-        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", ""))
         tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")',
                ("\\\\?\\UNC\\server\\share", "\\dir"))
-        tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")',
-               ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam'))
-        tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\"))
+        tester('ntpath.splitdrive("//?/UNC/server/share/dir")',
+               ("//?/UNC/server/share", "/dir"))
+
+    def test_splitroot(self):
+        tester("ntpath.splitroot('')", ('', '', ''))
+        tester("ntpath.splitroot('foo')", ('', '', 'foo'))
+        tester("ntpath.splitroot('foo\\bar')", ('', '', 'foo\\bar'))
+        tester("ntpath.splitroot('foo/bar')", ('', '', 'foo/bar'))
+        tester("ntpath.splitroot('\\')", ('', '\\', ''))
+        tester("ntpath.splitroot('/')", ('', '/', ''))
+        tester("ntpath.splitroot('\\foo\\bar')", ('', '\\', 'foo\\bar'))
+        tester("ntpath.splitroot('/foo/bar')", ('', '/', 'foo/bar'))
+        tester('ntpath.splitroot("c:foo\\bar")', ('c:', '', 'foo\\bar'))
+        tester('ntpath.splitroot("c:foo/bar")', ('c:', '', 'foo/bar'))
+        tester('ntpath.splitroot("c:\\foo\\bar")', ('c:', '\\', 'foo\\bar'))
+        tester('ntpath.splitroot("c:/foo/bar")', ('c:', '/', 'foo/bar'))
+
+        # Redundant slashes are not included in the root.
+        tester("ntpath.splitroot('c:\\\\a')", ('c:', '\\', '\\a'))
+        tester("ntpath.splitroot('c:\\\\\\a/b')", ('c:', '\\', '\\\\a/b'))
+
+        # Mixed path separators.
+        tester("ntpath.splitroot('c:/\\')", ('c:', '/', '\\'))
+        tester("ntpath.splitroot('c:\\/')", ('c:', '\\', '/'))
+        tester("ntpath.splitroot('/\\a/b\\/\\')", ('/\\a/b', '\\', '/\\'))
+        tester("ntpath.splitroot('\\/a\\b/\\/')", ('\\/a\\b', '/', '\\/'))
+
+        # UNC paths.
+        tester("ntpath.splitroot('\\\\')", ('\\\\', '', ''))
+        tester("ntpath.splitroot('//')", ('//', '', ''))
+        tester('ntpath.splitroot("\\\\conky\\mountpoint\\foo\\bar")',
+               ('\\\\conky\\mountpoint', '\\', 'foo\\bar'))
+        tester('ntpath.splitroot("//conky/mountpoint/foo/bar")',
+               ('//conky/mountpoint', '/', 'foo/bar'))
+        tester('ntpath.splitroot("\\\\\\conky\\mountpoint\\foo\\bar")',
+            ('\\\\\\conky', '\\', 'mountpoint\\foo\\bar'))
+        tester('ntpath.splitroot("///conky/mountpoint/foo/bar")',
+            ('///conky', '/', 'mountpoint/foo/bar'))
+        tester('ntpath.splitroot("\\\\conky\\\\mountpoint\\foo\\bar")',
+               ('\\\\conky\\', '\\', 'mountpoint\\foo\\bar'))
+        tester('ntpath.splitroot("//conky//mountpoint/foo/bar")',
+               ('//conky/', '/', 'mountpoint/foo/bar'))
+
+        # Issue #19911: UNC part containing U+0130
+        self.assertEqual(ntpath.splitroot('//conky/MOUNTPOİNT/foo/bar'),
+                         ('//conky/MOUNTPOİNT', '/', 'foo/bar'))
+
+        # gh-81790: support device namespace, including UNC drives.
+        tester('ntpath.splitroot("//?/c:")', ("//?/c:", "", ""))
+        tester('ntpath.splitroot("//?/c:/")', ("//?/c:", "/", ""))
+        tester('ntpath.splitroot("//?/c:/dir")', ("//?/c:", "/", "dir"))
+        tester('ntpath.splitroot("//?/UNC")', ("//?/UNC", "", ""))
+        tester('ntpath.splitroot("//?/UNC/")', ("//?/UNC/", "", ""))
+        tester('ntpath.splitroot("//?/UNC/server/")', ("//?/UNC/server/", "", ""))
+        tester('ntpath.splitroot("//?/UNC/server/share")', ("//?/UNC/server/share", "", ""))
+        tester('ntpath.splitroot("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/", "dir"))
+        tester('ntpath.splitroot("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")',
+               ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/', 'spam'))
+        tester('ntpath.splitroot("//?/BootPartition/")', ("//?/BootPartition", "/", ""))
+
+        tester('ntpath.splitroot("\\\\?\\c:")', ("\\\\?\\c:", "", ""))
+        tester('ntpath.splitroot("\\\\?\\c:\\")', ("\\\\?\\c:", "\\", ""))
+        tester('ntpath.splitroot("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\", "dir"))
+        tester('ntpath.splitroot("\\\\?\\UNC")', ("\\\\?\\UNC", "", ""))
+        tester('ntpath.splitroot("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", "", ""))
+        tester('ntpath.splitroot("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "", ""))
+        tester('ntpath.splitroot("\\\\?\\UNC\\server\\share")',
+               ("\\\\?\\UNC\\server\\share", "", ""))
+        tester('ntpath.splitroot("\\\\?\\UNC\\server\\share\\dir")',
+               ("\\\\?\\UNC\\server\\share", "\\", "dir"))
+        tester('ntpath.splitroot("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")',
+               ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\', 'spam'))
+        tester('ntpath.splitroot("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\", ""))
 
         # gh-96290: support partial/invalid UNC drives
-        tester('ntpath.splitdrive("//")', ("//", ""))  # empty server & missing share
-        tester('ntpath.splitdrive("///")', ("///", ""))  # empty server & empty share
-        tester('ntpath.splitdrive("///y")', ("///y", ""))  # empty server & non-empty share
-        tester('ntpath.splitdrive("//x")', ("//x", ""))  # non-empty server & missing share
-        tester('ntpath.splitdrive("//x/")', ("//x/", ""))  # non-empty server & empty share
+        tester('ntpath.splitroot("//")', ("//", "", ""))  # empty server & missing share
+        tester('ntpath.splitroot("///")', ("///", "", ""))  # empty server & empty share
+        tester('ntpath.splitroot("///y")', ("///y", "", ""))  # empty server & non-empty share
+        tester('ntpath.splitroot("//x")', ("//x", "", ""))  # non-empty server & missing share
+        tester('ntpath.splitroot("//x/")', ("//x/", "", ""))  # non-empty server & empty share
 
     def test_split(self):
         tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))
@@ -930,6 +979,9 @@ def test_path_splitext(self):
     def test_path_splitdrive(self):
         self._check_function(self.path.splitdrive)
 
+    def test_path_splitroot(self):
+        self._check_function(self.path.splitroot)
+
     def test_path_basename(self):
         self._check_function(self.path.basename)
 
diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 1fe242b7f6ab..a596795b44f0 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -80,26 +80,6 @@ def test_parse_parts(self):
         check(['c:\\a'],                ('', '', ['c:\\a']))
         check(['\\a'],                  ('', '', ['\\a']))
 
-    def test_splitroot(self):
-        f = self.cls._split_root
-        self.assertEqual(f(''), ('', '', ''))
-        self.assertEqual(f('a'), ('', '', 'a'))
-        self.assertEqual(f('a/b'), ('', '', 'a/b'))
-        self.assertEqual(f('a/b/'), ('', '', 'a/b/'))
-        self.assertEqual(f('/a'), ('', '/', 'a'))
-        self.assertEqual(f('/a/b'), ('', '/', 'a/b'))
-        self.assertEqual(f('/a/b/'), ('', '/', 'a/b/'))
-        # The root is collapsed when there are redundant slashes
-        # except when there are exactly two leading slashes, which
-        # is a special case in POSIX.
-        self.assertEqual(f('//a'), ('', '//', 'a'))
-        self.assertEqual(f('///a'), ('', '/', 'a'))
-        self.assertEqual(f('///a/b'), ('', '/', 'a/b'))
-        # Paths which look like NT paths aren't treated specially.
-        self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b'))
-        self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b'))
-        self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b'))
-
 
 class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
     cls = pathlib.PureWindowsPath
@@ -143,23 +123,6 @@ def test_parse_parts(self):
         check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y']))
         check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y']))
 
-    def test_splitroot(self):
-        f = self.cls._split_root
-        self.assertEqual(f(''), ('', '', ''))
-        self.assertEqual(f('a'), ('', '', 'a'))
-        self.assertEqual(f('a\\b'), ('', '', 'a\\b'))
-        self.assertEqual(f('\\a'), ('', '\\', 'a'))
-        self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b'))
-        self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b'))
-        self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b'))
-        # Redundant slashes in the root are collapsed.
-        self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a'))
-        self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b'))
-        # Valid UNC paths.
-        self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', ''))
-        self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', ''))
-        self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d'))
-
 
 #
 # Tests for the pure classes.
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
index 6c1c0f5577b7..9be4640f970a 100644
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -115,6 +115,32 @@ def test_splitext(self):
         self.splitextTest("........", "........", "")
         self.splitextTest("", "", "")
 
+    def test_splitroot(self):
+        f = posixpath.splitroot
+        self.assertEqual(f(''), ('', '', ''))
+        self.assertEqual(f('a'), ('', '', 'a'))
+        self.assertEqual(f('a/b'), ('', '', 'a/b'))
+        self.assertEqual(f('a/b/'), ('', '', 'a/b/'))
+        self.assertEqual(f('/a'), ('', '/', 'a'))
+        self.assertEqual(f('/a/b'), ('', '/', 'a/b'))
+        self.assertEqual(f('/a/b/'), ('', '/', 'a/b/'))
+        # The root is collapsed when there are redundant slashes
+        # except when there are exactly two leading slashes, which
+        # is a special case in POSIX.
+        self.assertEqual(f('//a'), ('', '//', 'a'))
+        self.assertEqual(f('///a'), ('', '/', '//a'))
+        self.assertEqual(f('///a/b'), ('', '/', '//a/b'))
+        # Paths which look like NT paths aren't treated specially.
+        self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b'))
+        self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b'))
+        self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b'))
+        # Byte paths are supported
+        self.assertEqual(f(b''), (b'', b'', b''))
+        self.assertEqual(f(b'a'), (b'', b'', b'a'))
+        self.assertEqual(f(b'/a'), (b'', b'/', b'a'))
+        self.assertEqual(f(b'//a'), (b'', b'//', b'a'))
+        self.assertEqual(f(b'///a'), (b'', b'/', b'//a'))
+
     def test_isabs(self):
         self.assertIs(posixpath.isabs(""), False)
         self.assertIs(posixpath.isabs("/"), True)
@@ -752,6 +778,9 @@ def test_path_splitext(self):
     def test_path_splitdrive(self):
         self.assertPathEqual(self.path.splitdrive)
 
+    def test_path_splitroot(self):
+        self.assertPathEqual(self.path.splitroot)
+
     def test_path_basename(self):
         self.assertPathEqual(self.path.basename)
 
diff --git a/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst
new file mode 100644
index 000000000000..2082361c41d6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst
@@ -0,0 +1,3 @@
+Add :func:`os.path.splitroot()`, which splits a path into a 3-item tuple
+``(drive, root, tail)``. This new function is used by :mod:`pathlib` to
+improve the performance of path construction by up to a third.



More information about the Python-checkins mailing list