[Python-checkins] gh-96290: Support partial/invalid UNC drives in ntpath.normpath() and splitdrive() (GH-100351)

zooba webhook-mailer at python.org
Thu Jan 12 16:32:32 EST 2023


https://github.com/python/cpython/commit/55a26de6ba938962dc23f2495723cf0f4f3ab7c6
commit: 55a26de6ba938962dc23f2495723cf0f4f3ab7c6
branch: 3.11
author: Steve Dower <steve.dower at python.org>
committer: zooba <steve.dower at microsoft.com>
date: 2023-01-12T21:32:26Z
summary:

gh-96290: Support partial/invalid UNC drives in ntpath.normpath() and splitdrive() (GH-100351)

This brings the Python implementation of `ntpath.normpath()` in line with the C implementation added in 99fcf15

Co-authored-by: Barney Gale <barney.gale at gmail.com>
Co-authored-by: Eryk Sun <eryksun at gmail.com>

files:
A Misc/NEWS.d/next/Library/2022-12-19-23-19-26.gh-issue-96290.qFjsi6.rst
M Lib/ntpath.py
M Lib/test/test_ntpath.py
M Lib/test/test_zipfile.py

diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 10c6799300f6..1cfb15b77102 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -87,16 +87,20 @@ def normcase(s):
 def isabs(s):
     """Test whether a path is absolute"""
     s = os.fspath(s)
-    # Paths beginning with \\?\ are always absolute, but do not
-    # necessarily contain a drive.
     if isinstance(s, bytes):
-        if s.replace(b'/', b'\\').startswith(b'\\\\?\\'):
-            return True
+        sep = b'\\'
+        altsep = b'/'
+        colon_sep = b':\\'
     else:
-        if s.replace('/', '\\').startswith('\\\\?\\'):
-            return True
-    s = splitdrive(s)[1]
-    return len(s) > 0 and s[0] and s[0] in _get_bothseps(s)
+        sep = '\\'
+        altsep = '/'
+        colon_sep = ':\\'
+    s = s[:3].replace(altsep, sep)
+    # Absolute: UNC, device, and paths with a drive and root.
+    # LEGACY BUG: isabs("/x") should be false since the path has no drive.
+    if s.startswith(sep) or s.startswith(colon_sep, 1):
+        return True
+    return False
 
 
 # Join two (or more) paths.
@@ -172,28 +176,26 @@ def splitdrive(p):
             sep = b'\\'
             altsep = b'/'
             colon = b':'
+            unc_prefix = b'\\\\?\\UNC\\'
         else:
             sep = '\\'
             altsep = '/'
             colon = ':'
+            unc_prefix = '\\\\?\\UNC\\'
         normp = p.replace(altsep, sep)
-        if (normp[0:2] == sep*2) and (normp[2:3] != sep):
-            # is a UNC path:
-            # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
-            # \\machine\mountpoint\directory\etc\...
-            #           directory ^^^^^^^^^^^^^^^
-            index = normp.find(sep, 2)
+        if normp[0:2] == sep * 2:
+            # UNC drives, e.g. \\server\share or \\?\UNC\server\share
+            # Device drives, e.g. \\.\device or \\?\device
+            start = 8 if normp[:8].upper() == unc_prefix else 2
+            index = normp.find(sep, start)
             if index == -1:
-                return p[:0], p
+                return p, p[:0]
             index2 = normp.find(sep, index + 1)
-            # a UNC path can't have two slashes in a row
-            # (after the initial two)
-            if index2 == index + 1:
-                return p[:0], p
             if index2 == -1:
-                index2 = len(p)
+                return p, p[:0]
             return p[:index2], p[index2:]
         if normp[1:2] == colon:
+            # Drive-letter drives, e.g. X:
             return p[:2], p[2:]
     return p[:0], p
 
@@ -499,20 +501,11 @@ def normpath(path):
             altsep = b'/'
             curdir = b'.'
             pardir = b'..'
-            special_prefixes = (b'\\\\.\\', b'\\\\?\\')
         else:
             sep = '\\'
             altsep = '/'
             curdir = '.'
             pardir = '..'
-            special_prefixes = ('\\\\.\\', '\\\\?\\')
-        if path.startswith(special_prefixes):
-            # in the case of paths with these prefixes:
-            # \\.\ -> device names
-            # \\?\ -> literal paths
-            # do not do any normalization, but return the path
-            # unchanged apart from the call to os.fspath()
-            return path
         path = path.replace(altsep, sep)
         prefix, path = splitdrive(path)
 
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index ab3603bdd730..c26c74cdd619 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -107,17 +107,50 @@ def test_splitdrive(self):
         tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")',
                ('//conky/mountpoint', '/foo/bar'))
         tester('ntpath.splitdrive("\\\\\\conky\\mountpoint\\foo\\bar")',
-            ('', '\\\\\\conky\\mountpoint\\foo\\bar'))
+            ('\\\\\\conky', '\\mountpoint\\foo\\bar'))
         tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")',
-            ('', '///conky/mountpoint/foo/bar'))
+            ('///conky', '/mountpoint/foo/bar'))
         tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")',
-               ('', '\\\\conky\\\\mountpoint\\foo\\bar'))
+               ('\\\\conky\\', '\\mountpoint\\foo\\bar'))
         tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")',
-               ('', '//conky//mountpoint/foo/bar'))
+               ('//conky/', '/mountpoint/foo/bar'))
         # Issue #19911: UNC part containing U+0130
         self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'),
                          ('//conky/MOUNTPOİNT', '/foo/bar'))
 
+        # gh-81790: support device namespace, including UNC drives.
+        tester('ntpath.splitdrive("//?/c:")', ("//?/c:", ""))
+        tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/"))
+        tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir"))
+        tester('ntpath.splitdrive("//?/UNC")', ("//?/UNC", ""))
+        tester('ntpath.splitdrive("//?/UNC/")', ("//?/UNC/", ""))
+        tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", ""))
+        tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", ""))
+        tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir"))
+        tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")',
+               ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam'))
+        tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/"))
+
+        tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", ""))
+        tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\"))
+        tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir"))
+        tester('ntpath.splitdrive("\\\\?\\UNC")', ("\\\\?\\UNC", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")',
+               ("\\\\?\\UNC\\server\\share", "\\dir"))
+        tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")',
+               ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam'))
+        tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\"))
+
+        # gh-96290: support partial/invalid UNC drives
+        tester('ntpath.splitdrive("//")', ("//", ""))  # empty server & missing share
+        tester('ntpath.splitdrive("///")', ("///", ""))  # empty server & empty share
+        tester('ntpath.splitdrive("///y")', ("///y", ""))  # empty server & non-empty share
+        tester('ntpath.splitdrive("//x")', ("//x", ""))  # non-empty server & missing share
+        tester('ntpath.splitdrive("//x/")', ("//x/", ""))  # non-empty server & empty share
+
     def test_split(self):
         tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))
         tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")',
@@ -136,6 +169,10 @@ def test_isabs(self):
         tester('ntpath.isabs("\\foo")', 1)
         tester('ntpath.isabs("\\foo\\bar")', 1)
 
+        # gh-96290: normal UNC paths and device paths without trailing backslashes
+        tester('ntpath.isabs("\\\\conky\\mountpoint")', 1)
+        tester('ntpath.isabs("\\\\.\\C:")', 1)
+
     def test_commonprefix(self):
         tester('ntpath.commonprefix(["/home/swenson/spam", "/home/swen/spam"])',
                "/home/swen")
@@ -245,6 +282,12 @@ def test_normpath(self):
         tester("ntpath.normpath('//server/share/../..')",  '\\\\server\\share\\')
         tester("ntpath.normpath('//server/share/../../')", '\\\\server\\share\\')
 
+        # gh-96290: don't normalize partial/invalid UNC drives as rooted paths.
+        tester("ntpath.normpath('\\\\foo\\\\')", '\\\\foo\\\\')
+        tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
+        tester("ntpath.normpath('\\\\foo')", '\\\\foo')
+        tester("ntpath.normpath('\\\\')", '\\\\')
+
     def test_realpath_curdir(self):
         expected = ntpath.normpath(os.getcwd())
         tester("ntpath.realpath('.')", expected)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index f4c11d88c8a0..fd25e5a800d7 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1468,10 +1468,10 @@ def test_extract_hackers_arcnames_windows_only(self):
             (r'C:\foo\bar', 'foo/bar'),
             (r'//conky/mountpoint/foo/bar', 'foo/bar'),
             (r'\\conky\mountpoint\foo\bar', 'foo/bar'),
-            (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
-            (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
-            (r'//conky//mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
-            (r'\\conky\\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
+            (r'///conky/mountpoint/foo/bar', 'mountpoint/foo/bar'),
+            (r'\\\conky\mountpoint\foo\bar', 'mountpoint/foo/bar'),
+            (r'//conky//mountpoint/foo/bar', 'mountpoint/foo/bar'),
+            (r'\\conky\\mountpoint\foo\bar', 'mountpoint/foo/bar'),
             (r'//?/C:/foo/bar', 'foo/bar'),
             (r'\\?\C:\foo\bar', 'foo/bar'),
             (r'C:/../C:/foo/bar', 'C_/foo/bar'),
diff --git a/Misc/NEWS.d/next/Library/2022-12-19-23-19-26.gh-issue-96290.qFjsi6.rst b/Misc/NEWS.d/next/Library/2022-12-19-23-19-26.gh-issue-96290.qFjsi6.rst
new file mode 100644
index 000000000000..33f98602bd1b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-12-19-23-19-26.gh-issue-96290.qFjsi6.rst
@@ -0,0 +1,5 @@
+Fix handling of partial and invalid UNC drives in ``ntpath.splitdrive()``, and in
+``ntpath.normpath()`` on non-Windows systems. Paths such as '\\server' and '\\' are now considered
+by ``splitdrive()`` to contain only a drive, and consequently are not modified by ``normpath()`` on
+non-Windows systems. The behaviour of ``normpath()`` on Windows systems is unaffected, as native
+OS APIs are used. Patch by Eryk Sun, with contributions by Barney Gale.



More information about the Python-checkins mailing list