[Python-checkins] bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287)

Miss Islington (bot) webhook-mailer at python.org
Wed Aug 21 17:09:38 EDT 2019


https://github.com/python/cpython/commit/c30c869e8dec5eefdee7977943ffa11a8e3c8d75
commit: c30c869e8dec5eefdee7977943ffa11a8e3c8d75
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-08-21T14:09:33-07:00
summary:

bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287)

(cherry picked from commit 75e064962ee0e31ec19a8081e9d9cc957baf6415)

Co-authored-by: Steve Dower <steve.dower at python.org>

files:
A Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst
M Doc/library/os.path.rst
M Doc/whatsnew/3.8.rst
M Lib/ntpath.py
M Lib/test/test_ntpath.py
M Lib/test/test_os.py
M Lib/test/test_shutil.py
M Lib/unittest/test/test_discovery.py

diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index a673b81278ea..640ceecbc78f 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -350,11 +350,19 @@ the :mod:`glob` module.)
 .. function:: realpath(path)
 
    Return the canonical path of the specified filename, eliminating any symbolic
-   links encountered in the path (if they are supported by the operating system).
+   links encountered in the path (if they are supported by the operating
+   system).
+
+   .. note::
+      When symbolic link cycles occur, the returned path will be one member of
+      the cycle, but no guarantee is made about which member that will be.
 
    .. versionchanged:: 3.6
       Accepts a :term:`path-like object`.
 
+   .. versionchanged:: 3.8
+      Symbolic links and junctions are now resolved on Windows.
+
 
 .. function:: relpath(path, start=os.curdir)
 
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index e8238251d6ea..93758e96b573 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level.
 environment variable and does not use :envvar:`HOME`, which is not normally set
 for regular user accounts.
 
+:func:`~os.path.realpath` on Windows now resolves reparse points, including
+symlinks and directory junctions.
+
 
 ncurses
 -------
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index f3cfabf0238e..ef4999e1473a 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -519,8 +519,94 @@ def abspath(path):
         except (OSError, ValueError):
             return _abspath_fallback(path)
 
-# realpath is a no-op on systems without islink support
-realpath = abspath
+try:
+    from nt import _getfinalpathname, readlink as _nt_readlink
+except ImportError:
+    # realpath is a no-op on systems without _getfinalpathname support.
+    realpath = abspath
+else:
+    def _readlink_deep(path, seen=None):
+        if seen is None:
+            seen = set()
+
+        while normcase(path) not in seen:
+            seen.add(normcase(path))
+            try:
+                path = _nt_readlink(path)
+            except OSError as ex:
+                # Stop on file (2) or directory (3) not found, or
+                # paths that are not reparse points (4390)
+                if ex.winerror in (2, 3, 4390):
+                    break
+                raise
+            except ValueError:
+                # Stop on reparse points that are not symlinks
+                break
+        return path
+
+    def _getfinalpathname_nonstrict(path):
+        # Fast path to get the final path name. If this succeeds, there
+        # is no need to go any further.
+        try:
+            return _getfinalpathname(path)
+        except OSError:
+            pass
+
+        # Allow file (2) or directory (3) not found, invalid syntax (123),
+        # and symlinks that cannot be followed (1921)
+        allowed_winerror = 2, 3, 123, 1921
+
+        # Non-strict algorithm is to find as much of the target directory
+        # as we can and join the rest.
+        tail = ''
+        seen = set()
+        while path:
+            try:
+                path = _readlink_deep(path, seen)
+                path = _getfinalpathname(path)
+                return join(path, tail) if tail else path
+            except OSError as ex:
+                if ex.winerror not in allowed_winerror:
+                    raise
+                path, name = split(path)
+                if path and not name:
+                    return abspath(path + tail)
+                tail = join(name, tail) if tail else name
+        return abspath(tail)
+
+    def realpath(path):
+        path = os.fspath(path)
+        if isinstance(path, bytes):
+            prefix = b'\\\\?\\'
+            unc_prefix = b'\\\\?\\UNC\\'
+            new_unc_prefix = b'\\\\'
+            cwd = os.getcwdb()
+        else:
+            prefix = '\\\\?\\'
+            unc_prefix = '\\\\?\\UNC\\'
+            new_unc_prefix = '\\\\'
+            cwd = os.getcwd()
+        had_prefix = path.startswith(prefix)
+        path = _getfinalpathname_nonstrict(path)
+        # The path returned by _getfinalpathname will always start with \\?\ -
+        # strip off that prefix unless it was already provided on the original
+        # path.
+        if not had_prefix and path.startswith(prefix):
+            # For UNC paths, the prefix will actually be \\?\UNC\
+            # Handle that case as well.
+            if path.startswith(unc_prefix):
+                spath = new_unc_prefix + path[len(unc_prefix):]
+            else:
+                spath = path[len(prefix):]
+            # Ensure that the non-prefixed path resolves to the same path
+            try:
+                if _getfinalpathname(spath) == path:
+                    path = spath
+            except OSError as ex:
+                pass
+        return path
+
+
 # Win9x family and earlier have no Unicode filename support.
 supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
                               sys.getwindowsversion()[3] >= 2)
@@ -633,23 +719,6 @@ def commonpath(paths):
         raise
 
 
-# determine if two files are in fact the same file
-try:
-    # GetFinalPathNameByHandle is available starting with Windows 6.0.
-    # Windows XP and non-Windows OS'es will mock _getfinalpathname.
-    if sys.getwindowsversion()[:2] >= (6, 0):
-        from nt import _getfinalpathname
-    else:
-        raise ImportError
-except (AttributeError, ImportError):
-    # On Windows XP and earlier, two files are the same if their absolute
-    # pathnames are the same.
-    # Non-Windows operating systems fake this method with an XP
-    # approximation.
-    def _getfinalpathname(f):
-        return normcase(abspath(f))
-
-
 try:
     # The genericpath.isdir implementation uses os.stat and checks the mode
     # attribute to tell whether or not the path is a directory.
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 92d85ecbc4fc..74dc8c378e27 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -7,6 +7,7 @@
 from test import support, test_genericpath
 from tempfile import TemporaryFile
 
+
 try:
     import nt
 except ImportError:
@@ -14,6 +15,14 @@
     # but for those that require it we import here.
     nt = None
 
+try:
+    ntpath._getfinalpathname
+except AttributeError:
+    HAVE_GETFINALPATHNAME = False
+else:
+    HAVE_GETFINALPATHNAME = True
+
+
 def tester(fn, wantResult):
     fn = fn.replace("\\", "\\\\")
     gotResult = eval(fn)
@@ -194,6 +203,189 @@ def test_normpath(self):
         tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
         tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')
 
+    def test_realpath_curdir(self):
+        expected = ntpath.normpath(os.getcwd())
+        tester("ntpath.realpath('.')", expected)
+        tester("ntpath.realpath('./.')", expected)
+        tester("ntpath.realpath('/'.join(['.'] * 100))", expected)
+        tester("ntpath.realpath('.\\.')", expected)
+        tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)
+
+    def test_realpath_pardir(self):
+        expected = ntpath.normpath(os.getcwd())
+        tester("ntpath.realpath('..')", ntpath.dirname(expected))
+        tester("ntpath.realpath('../..')",
+               ntpath.dirname(ntpath.dirname(expected)))
+        tester("ntpath.realpath('/'.join(['..'] * 50))",
+               ntpath.splitdrive(expected)[0] + '\\')
+        tester("ntpath.realpath('..\\..')",
+               ntpath.dirname(ntpath.dirname(expected)))
+        tester("ntpath.realpath('\\'.join(['..'] * 50))",
+               ntpath.splitdrive(expected)[0] + '\\')
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_basic(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        open(ABSTFN, "wb").close()
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+
+        os.symlink(ABSTFN, ABSTFN + "1")
+        self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
+        self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
+                         os.fsencode(ABSTFN))
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_relative(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        open(ABSTFN, "wb").close()
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+
+        os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_broken_symlinks(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        os.mkdir(ABSTFN)
+        self.addCleanup(support.rmtree, ABSTFN)
+
+        with support.change_cwd(ABSTFN):
+            os.mkdir("subdir")
+            os.chdir("subdir")
+            os.symlink(".", "recursive")
+            os.symlink("..", "parent")
+            os.chdir("..")
+            os.symlink(".", "self")
+            os.symlink("missing", "broken")
+            os.symlink(r"broken\bar", "broken1")
+            os.symlink(r"self\self\broken", "broken2")
+            os.symlink(r"subdir\parent\subdir\parent\broken", "broken3")
+            os.symlink(ABSTFN + r"\broken", "broken4")
+            os.symlink(r"recursive\..\broken", "broken5")
+
+            self.assertEqual(ntpath.realpath("broken"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath(r"broken\foo"),
+                             ABSTFN + r"\missing\foo")
+            self.assertEqual(ntpath.realpath(r"broken1"),
+                             ABSTFN + r"\missing\bar")
+            self.assertEqual(ntpath.realpath(r"broken1\baz"),
+                             ABSTFN + r"\missing\bar\baz")
+            self.assertEqual(ntpath.realpath("broken2"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken3"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken4"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken5"),
+                             ABSTFN + r"\missing")
+
+            self.assertEqual(ntpath.realpath(b"broken"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(rb"broken\foo"),
+                             os.fsencode(ABSTFN + r"\missing\foo"))
+            self.assertEqual(ntpath.realpath(rb"broken1"),
+                             os.fsencode(ABSTFN + r"\missing\bar"))
+            self.assertEqual(ntpath.realpath(rb"broken1\baz"),
+                             os.fsencode(ABSTFN + r"\missing\bar\baz"))
+            self.assertEqual(ntpath.realpath(b"broken2"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(rb"broken3"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(b"broken4"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(b"broken5"),
+                             os.fsencode(ABSTFN + r"\missing"))
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_symlink_loops(self):
+        # Bug #930024, return the path unchanged if we get into an infinite
+        # symlink loop.
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+        self.addCleanup(support.unlink, ABSTFN + "2")
+        self.addCleanup(support.unlink, ABSTFN + "y")
+        self.addCleanup(support.unlink, ABSTFN + "c")
+        self.addCleanup(support.unlink, ABSTFN + "a")
+
+        P = "\\\\?\\"
+
+        os.symlink(ABSTFN, ABSTFN)
+        self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN)
+
+        # cycles are non-deterministic as to which path is returned, but
+        # it will always be the fully resolved path of one member of the cycle
+        os.symlink(ABSTFN + "1", ABSTFN + "2")
+        os.symlink(ABSTFN + "2", ABSTFN + "1")
+        expected = (P + ABSTFN + "1", P + ABSTFN + "2")
+        self.assertIn(ntpath.realpath(ABSTFN + "1"), expected)
+        self.assertIn(ntpath.realpath(ABSTFN + "2"), expected)
+
+        self.assertIn(ntpath.realpath(ABSTFN + "1\\x"),
+                      (ntpath.join(r, "x") for r in expected))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."),
+                         ntpath.dirname(ABSTFN))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
+                         ntpath.dirname(P + ABSTFN) + "\\x")
+        os.symlink(ABSTFN + "x", ABSTFN + "y")
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\"
+                                         + ntpath.basename(ABSTFN) + "y"),
+                         P + ABSTFN + "x")
+        self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\"
+                                      + ntpath.basename(ABSTFN) + "1"),
+                      expected)
+
+        os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
+        self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a")
+
+        os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+                   + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
+        self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c")
+
+        # Test using relative path as well.
+        self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN)
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_symlink_prefix(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        self.addCleanup(support.unlink, ABSTFN + "3")
+        self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.")
+        self.addCleanup(support.unlink, ABSTFN + "3link")
+        self.addCleanup(support.unlink, ABSTFN + "3.link")
+
+        with open(ABSTFN + "3", "wb") as f:
+            f.write(b'0')
+        os.symlink(ABSTFN + "3", ABSTFN + "3link")
+
+        with open("\\\\?\\" + ABSTFN + "3.", "wb") as f:
+            f.write(b'1')
+        os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")
+
+        self.assertEqual(ntpath.realpath(ABSTFN + "3link"),
+                         ABSTFN + "3")
+        self.assertEqual(ntpath.realpath(ABSTFN + "3.link"),
+                         "\\\\?\\" + ABSTFN + "3.")
+
+        # Resolved paths should be usable to open target files
+        with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f:
+            self.assertEqual(f.read(), b'0')
+        with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f:
+            self.assertEqual(f.read(), b'1')
+
+        # When the prefix is included, it is not stripped
+        self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
+                         "\\\\?\\" + ABSTFN + "3")
+        self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
+                         "\\\\?\\" + ABSTFN + "3.")
+
     def test_expandvars(self):
         with support.EnvironmentVarGuard() as env:
             env.clear()
@@ -288,11 +480,11 @@ def test_abspath(self):
 
     def test_relpath(self):
         tester('ntpath.relpath("a")', 'a')
-        tester('ntpath.relpath(os.path.abspath("a"))', 'a')
+        tester('ntpath.relpath(ntpath.abspath("a"))', 'a')
         tester('ntpath.relpath("a/b")', 'a\\b')
         tester('ntpath.relpath("../a/b")', '..\\a\\b')
         with support.temp_cwd(support.TESTFN) as cwd_dir:
-            currentdir = os.path.basename(cwd_dir)
+            currentdir = ntpath.basename(cwd_dir)
             tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
             tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
         tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
@@ -417,7 +609,7 @@ def test_ismount(self):
             # locations below cannot then refer to mount points
             #
             drive, path = ntpath.splitdrive(sys.executable)
-            with support.change_cwd(os.path.dirname(sys.executable)):
+            with support.change_cwd(ntpath.dirname(sys.executable)):
                 self.assertFalse(ntpath.ismount(drive.lower()))
                 self.assertFalse(ntpath.ismount(drive.upper()))
 
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index b2cd4cca5f21..15fd65b55149 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -3358,10 +3358,7 @@ def test_oserror_filename(self):
         if hasattr(os, "lchmod"):
             funcs.append((self.filenames, os.lchmod, 0o777))
         if hasattr(os, "readlink"):
-            if sys.platform == "win32":
-                funcs.append((self.unicode_filenames, os.readlink,))
-            else:
-                funcs.append((self.filenames, os.readlink,))
+            funcs.append((self.filenames, os.readlink,))
 
 
         for filenames, func, *func_args in funcs:
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index e209607f22c1..88dc4d9e195e 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -1871,11 +1871,7 @@ def test_move_dangling_symlink(self):
         dst_link = os.path.join(self.dst_dir, 'quux')
         shutil.move(dst, dst_link)
         self.assertTrue(os.path.islink(dst_link))
-        # On Windows, os.path.realpath does not follow symlinks (issue #9949)
-        if os.name == 'nt':
-            self.assertEqual(os.path.realpath(src), os.readlink(dst_link))
-        else:
-            self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link))
+        self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link))
 
     @support.skip_unless_symlink
     @mock_rename
diff --git a/Lib/unittest/test/test_discovery.py b/Lib/unittest/test/test_discovery.py
index 204043b493b5..16e081e1fb76 100644
--- a/Lib/unittest/test/test_discovery.py
+++ b/Lib/unittest/test/test_discovery.py
@@ -723,11 +723,13 @@ class Module(object):
         original_listdir = os.listdir
         original_isfile = os.path.isfile
         original_isdir = os.path.isdir
+        original_realpath = os.path.realpath
 
         def cleanup():
             os.listdir = original_listdir
             os.path.isfile = original_isfile
             os.path.isdir = original_isdir
+            os.path.realpath = original_realpath
             del sys.modules['foo']
             if full_path in sys.path:
                 sys.path.remove(full_path)
@@ -742,6 +744,10 @@ def isdir(_):
         os.listdir = listdir
         os.path.isfile = isfile
         os.path.isdir = isdir
+        if os.name == 'nt':
+            # ntpath.realpath may inject path prefixes when failing to
+            # resolve real files, so we substitute abspath() here instead.
+            os.path.realpath = os.path.abspath
         return full_path
 
     def test_detect_module_clash(self):
diff --git a/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst
new file mode 100644
index 000000000000..e42169a927c7
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst
@@ -0,0 +1 @@
+Enable support for following symlinks in :func:`os.realpath`.



More information about the Python-checkins mailing list