[Python-checkins] cpython: Issue 23193: Add numeric_owner to tarfile.TarFile.extract() and

eric.smith python-checkins at python.org
Wed Apr 15 16:27:51 CEST 2015


https://hg.python.org/cpython/rev/6b70f16d585a
changeset:   95673:6b70f16d585a
user:        Eric V. Smith <eric at trueblade.com>
date:        Wed Apr 15 10:27:58 2015 -0400
summary:
  Issue 23193: Add numeric_owner to tarfile.TarFile.extract() and tarfile.TarFile.extractall().

files:
  Doc/library/tarfile.rst  |   19 +++-
  Doc/whatsnew/3.5.rst     |   22 ++-
  Lib/tarfile.py           |   45 +++++---
  Lib/test/test_tarfile.py |  132 +++++++++++++++++++++++++++
  Misc/ACKS                |    1 +
  Misc/NEWS                |    4 +
  6 files changed, 195 insertions(+), 28 deletions(-)


diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -367,7 +367,7 @@
    available.
 
 
-.. method:: TarFile.extractall(path=".", members=None)
+.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False)
 
    Extract all members from the archive to the current working directory or
    directory *path*. If optional *members* is given, it must be a subset of the
@@ -377,6 +377,10 @@
    reset each time a file is created in it. And, if a directory's permissions do
    not allow writing, extracting files to it will fail.
 
+   If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
+   are used to set the owner/group for the extracted files. Otherwise, the named
+   values from the tarfile are used.
+
    .. warning::
 
       Never extract archives from untrusted sources without prior inspection.
@@ -384,8 +388,11 @@
       that have absolute filenames starting with ``"/"`` or filenames with two
       dots ``".."``.
 
+   .. versionchanged:: 3.5
+      Added the *numeric_only* parameter.
 
-.. method:: TarFile.extract(member, path="", set_attrs=True)
+
+.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False)
 
    Extract a member from the archive to the current working directory, using its
    full name. Its file information is extracted as accurately as possible. *member*
@@ -393,6 +400,10 @@
    directory using *path*. File attributes (owner, mtime, mode) are set unless
    *set_attrs* is false.
 
+   If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
+   are used to set the owner/group for the extracted files. Otherwise, the named
+   values from the tarfile are used.
+
    .. note::
 
       The :meth:`extract` method does not take care of several extraction issues.
@@ -405,6 +416,9 @@
    .. versionchanged:: 3.2
       Added the *set_attrs* parameter.
 
+   .. versionchanged:: 3.5
+      Added the *numeric_only* parameter.
+
 .. method:: TarFile.extractfile(member)
 
    Extract a member from the archive as a file object. *member* may be a filename
@@ -827,4 +841,3 @@
 because all the metadata is stored using *UTF-8*. *encoding* is only used in
 the rare cases when binary pax headers are decoded or when strings with
 surrogate characters are stored.
-
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -479,26 +479,32 @@
   :meth:`socket.socket.send`.
   (Contributed by Giampaolo Rodola' in :issue:`17552`.)
 
+subprocess
+----------
+
+* The new :func:`subprocess.run` function runs subprocesses and returns a
+  :class:`subprocess.CompletedProcess` object.  It Provides a more consistent
+  API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
+  :func:`~subprocess.check_output`.
+
 sysconfig
 ---------
 
 * The user scripts directory on Windows is now versioned.
   (Contributed by Paul Moore in :issue:`23437`.)
 
-
 tarfile
 -------
 
 * The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation)
   mode.  (Contributed by Berker Peksag in :issue:`21717`.)
 
-subprocess
-----------
-
-* The new :func:`subprocess.run` function runs subprocesses and returns a
-  :class:`subprocess.CompletedProcess` object.  It Provides a more consistent
-  API than :func:`~subprocess.call`, :func:`~subprocess.check_call` and
-  :func:`~subprocess.check_output`.
+* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract`
+  methods now take a keyword parameter *numeric_only*. If set to ``True``,
+  the extracted files and directories will be owned by the numeric uid and gid
+  from the tarfile. If set to ``False`` (the default, and the behavior in
+  versions prior to 3.5), they will be owned bythe named user and group in the
+  tarfile.  (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.)
 
 time
 ----
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1972,12 +1972,13 @@
 
         self.members.append(tarinfo)
 
-    def extractall(self, path=".", members=None):
+    def extractall(self, path=".", members=None, *, numeric_owner=False):
         """Extract all members from the archive to the current working
            directory and set owner, modification time and permissions on
            directories afterwards. `path' specifies a different directory
            to extract to. `members' is optional and must be a subset of the
-           list returned by getmembers().
+           list returned by getmembers(). If `numeric_owner` is True, only
+           the numbers for user/group names are used and not the names.
         """
         directories = []
 
@@ -1991,7 +1992,8 @@
                 tarinfo = copy.copy(tarinfo)
                 tarinfo.mode = 0o700
             # Do not set_attrs directories, as we will do that further down
-            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
+            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
+                         numeric_owner=numeric_owner)
 
         # Reverse sort directories.
         directories.sort(key=lambda a: a.name)
@@ -2001,7 +2003,7 @@
         for tarinfo in directories:
             dirpath = os.path.join(path, tarinfo.name)
             try:
-                self.chown(tarinfo, dirpath)
+                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
                 self.utime(tarinfo, dirpath)
                 self.chmod(tarinfo, dirpath)
             except ExtractError as e:
@@ -2010,12 +2012,14 @@
                 else:
                     self._dbg(1, "tarfile: %s" % e)
 
-    def extract(self, member, path="", set_attrs=True):
+    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
         """Extract a member from the archive to the current working directory,
            using its full name. Its file information is extracted as accurately
            as possible. `member' may be a filename or a TarInfo object. You can
            specify a different directory using `path'. File attributes (owner,
-           mtime, mode) are set unless `set_attrs' is False.
+           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
+           is True, only the numbers for user/group names are used and not
+           the names.
         """
         self._check("r")
 
@@ -2030,7 +2034,8 @@
 
         try:
             self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
-                                 set_attrs=set_attrs)
+                                 set_attrs=set_attrs,
+                                 numeric_owner=numeric_owner)
         except OSError as e:
             if self.errorlevel > 0:
                 raise
@@ -2076,7 +2081,8 @@
             # blkdev, etc.), return None instead of a file object.
             return None
 
-    def _extract_member(self, tarinfo, targetpath, set_attrs=True):
+    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
+                        numeric_owner=False):
         """Extract the TarInfo object tarinfo to a physical
            file called targetpath.
         """
@@ -2114,7 +2120,7 @@
             self.makefile(tarinfo, targetpath)
 
         if set_attrs:
-            self.chown(tarinfo, targetpath)
+            self.chown(tarinfo, targetpath, numeric_owner)
             if not tarinfo.issym():
                 self.chmod(tarinfo, targetpath)
                 self.utime(tarinfo, targetpath)
@@ -2203,19 +2209,24 @@
             except KeyError:
                 raise ExtractError("unable to resolve link inside archive")
 
-    def chown(self, tarinfo, targetpath):
-        """Set owner of targetpath according to tarinfo.
+    def chown(self, tarinfo, targetpath, numeric_owner):
+        """Set owner of targetpath according to tarinfo. If numeric_owner
+           is True, use .gid/.uid instead of .gname/.uname.
         """
         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
             # We have to be root to do so.
-            try:
-                g = grp.getgrnam(tarinfo.gname)[2]
-            except KeyError:
+            if numeric_owner:
                 g = tarinfo.gid
-            try:
-                u = pwd.getpwnam(tarinfo.uname)[2]
-            except KeyError:
                 u = tarinfo.uid
+            else:
+                try:
+                    g = grp.getgrnam(tarinfo.gname)[2]
+                except KeyError:
+                    g = tarinfo.gid
+                try:
+                    u = pwd.getpwnam(tarinfo.uname)[2]
+                except KeyError:
+                    u = tarinfo.uid
             try:
                 if tarinfo.issym() and hasattr(os, "lchown"):
                     os.lchown(targetpath, u, g)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2,8 +2,10 @@
 import os
 import io
 from hashlib import md5
+from contextlib import contextmanager
 
 import unittest
+import unittest.mock
 import tarfile
 
 from test import support, script_helper
@@ -2264,6 +2266,136 @@
         self._test_partial_input("r:bz2")
 
 
+def root_is_uid_gid_0():
+    try:
+        import pwd, grp
+    except ImportError:
+        return False
+    if pwd.getpwuid(0)[0] != 'root':
+        return False
+    if grp.getgrgid(0)[0] != 'root':
+        return False
+    return True
+
+
+class NumericOwnerTest(unittest.TestCase):
+    # mock the following:
+    #  os.chown: so we can test what's being called
+    #  os.chmod: so the modes are not actually changed. if they are, we can't
+    #             delete the files/directories
+    #  os.geteuid: so we can lie and say we're root (uid = 0)
+
+    @staticmethod
+    def _make_test_archive(filename_1, dirname_1, filename_2):
+        # the file contents to write
+        fobj = io.BytesIO(b"content")
+
+        # create a tar file with a file, a directory, and a file within that
+        #  directory. Assign various .uid/.gid values to them
+        items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
+                 (dirname_1,  77, 76, tarfile.DIRTYPE, None),
+                 (filename_2, 88, 87, tarfile.REGTYPE, fobj),
+                 ]
+        with tarfile.open(tmpname, 'w') as tarfl:
+            for name, uid, gid, typ, contents in items:
+                t = tarfile.TarInfo(name)
+                t.uid = uid
+                t.gid = gid
+                t.uname = 'root'
+                t.gname = 'root'
+                t.type = typ
+                tarfl.addfile(t, contents)
+
+        # return the full pathname to the tar file
+        return tmpname
+
+    @staticmethod
+    @contextmanager
+    def _setup_test(mock_geteuid):
+        mock_geteuid.return_value = 0  # lie and say we're root
+        fname = 'numeric-owner-testfile'
+        dirname = 'dir'
+
+        # the names we want stored in the tarfile
+        filename_1 = fname
+        dirname_1 = dirname
+        filename_2 = os.path.join(dirname, fname)
+
+        # create the tarfile with the contents we're after
+        tar_filename = NumericOwnerTest._make_test_archive(filename_1,
+                                                           dirname_1,
+                                                           filename_2)
+
+        # open the tarfile for reading. yield it and the names of the items
+        #  we stored into the file
+        with tarfile.open(tar_filename) as tarfl:
+            yield tarfl, filename_1, dirname_1, filename_2
+
+    @unittest.mock.patch('os.chown')
+    @unittest.mock.patch('os.chmod')
+    @unittest.mock.patch('os.geteuid')
+    def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
+                                        mock_chown):
+        with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
+                                                filename_2):
+            tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
+            tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
+
+        # convert to filesystem paths
+        f_filename_1 = os.path.join(TEMPDIR, filename_1)
+        f_filename_2 = os.path.join(TEMPDIR, filename_2)
+
+        mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
+                                     unittest.mock.call(f_filename_2, 88, 87),
+                                     ],
+                                    any_order=True)
+
+    @unittest.mock.patch('os.chown')
+    @unittest.mock.patch('os.chmod')
+    @unittest.mock.patch('os.geteuid')
+    def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
+                                           mock_chown):
+        with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
+                                                filename_2):
+            tarfl.extractall(TEMPDIR, numeric_owner=True)
+
+        # convert to filesystem paths
+        f_filename_1 = os.path.join(TEMPDIR, filename_1)
+        f_dirname_1  = os.path.join(TEMPDIR, dirname_1)
+        f_filename_2 = os.path.join(TEMPDIR, filename_2)
+
+        mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
+                                     unittest.mock.call(f_dirname_1, 77, 76),
+                                     unittest.mock.call(f_filename_2, 88, 87),
+                                     ],
+                                    any_order=True)
+
+    # this test requires that uid=0 and gid=0 really be named 'root'. that's
+    #  because the uname and gname in the test file are 'root', and extract()
+    #  will look them up using pwd and grp to find their uid and gid, which we
+    #  test here to be 0.
+    @unittest.skipUnless(root_is_uid_gid_0(),
+                         'uid=0,gid=0 must be named "root"')
+    @unittest.mock.patch('os.chown')
+    @unittest.mock.patch('os.chmod')
+    @unittest.mock.patch('os.geteuid')
+    def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
+                                           mock_chown):
+        with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
+            tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
+
+        # convert to filesystem paths
+        f_filename_1 = os.path.join(TEMPDIR, filename_1)
+
+        mock_chown.assert_called_with(f_filename_1, 0, 0)
+
+    @unittest.mock.patch('os.geteuid')
+    def test_keyword_only(self, mock_geteuid):
+        with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
+            self.assertRaises(TypeError,
+                              tarfl.extract, filename_1, TEMPDIR, False, True)
+
+
 def setUpModule():
     support.unlink(TEMPDIR)
     os.makedirs(TEMPDIR)
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1458,6 +1458,7 @@
 Pauli Virtanen
 Frank Visser
 Johannes Vogel
+Michael Vogt
 Radu Voicilas
 Alex Volkov
 Martijn Vries
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,10 @@
 Library
 -------
 
+- Issue #23193: Add a numeric_owner parameter to
+  tarfile.TarFile.extract and tarfile.TarFile.extractall. Patch by
+  Michael Vogt and Eric Smith.
+
 - Issue #23342: Add a subprocess.run() function than returns a CalledProcess
   instance for a more consistent API than the existing call* functions.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list