[Python-checkins] cpython (merge 3.3 -> default): Issue #19920: TarFile.list() no longer fails when outputs a listing

serhiy.storchaka python-checkins at python.org
Wed Feb 5 19:55:30 CET 2014


http://hg.python.org/cpython/rev/077aa6d4f3b7
changeset:   88975:077aa6d4f3b7
parent:      88973:3dcbf8f928ec
parent:      88974:a5895fca91f3
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Feb 05 20:54:43 2014 +0200
summary:
  Issue #19920: TarFile.list() no longer fails when outputs a listing
containing non-encodable characters.  Added tests for TarFile.list().
Based on patch by Vajrasky Kok.

files:
  Lib/tarfile.py           |   28 +++--
  Lib/test/test_tarfile.py |  117 ++++++++++++++++++++++----
  Misc/NEWS                |    5 +
  3 files changed, 121 insertions(+), 29 deletions(-)


diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -257,6 +257,12 @@
                   DeprecationWarning, 2)
     return stat.filemode(mode)
 
+def _safe_print(s):
+    encoding = getattr(sys.stdout, 'encoding', None)
+    if encoding is not None:
+        s = s.encode(encoding, 'backslashreplace').decode(encoding)
+    print(s, end=' ')
+
 
 class TarError(Exception):
     """Base exception."""
@@ -1846,24 +1852,24 @@
 
         for tarinfo in self:
             if verbose:
-                print(stat.filemode(tarinfo.mode), end=' ')
-                print("%s/%s" % (tarinfo.uname or tarinfo.uid,
-                                 tarinfo.gname or tarinfo.gid), end=' ')
+                _safe_print(stat.filemode(tarinfo.mode))
+                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
+                                       tarinfo.gname or tarinfo.gid))
                 if tarinfo.ischr() or tarinfo.isblk():
-                    print("%10s" % ("%d,%d" \
-                                    % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
+                    _safe_print("%10s" %
+                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
                 else:
-                    print("%10d" % tarinfo.size, end=' ')
-                print("%d-%02d-%02d %02d:%02d:%02d" \
-                      % time.localtime(tarinfo.mtime)[:6], end=' ')
+                    _safe_print("%10d" % tarinfo.size)
+                _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
+                            % time.localtime(tarinfo.mtime)[:6])
 
-            print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
+            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
 
             if verbose:
                 if tarinfo.issym():
-                    print("->", tarinfo.linkname, end=' ')
+                    _safe_print("-> " + tarinfo.linkname)
                 if tarinfo.islnk():
-                    print("link to", tarinfo.linkname, end=' ')
+                    _safe_print("link to " + tarinfo.linkname)
             print()
 
     def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -219,6 +219,84 @@
     pass
 
 
+class ListTest(ReadTest, unittest.TestCase):
+
+    # Override setUp to use default encoding (UTF-8)
+    def setUp(self):
+        self.tar = tarfile.open(self.tarname, mode=self.mode)
+
+    def test_list(self):
+        tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
+        with support.swap_attr(sys, 'stdout', tio):
+            self.tar.list(verbose=False)
+        out = tio.detach().getvalue()
+        self.assertIn(b'ustar/conttype', out)
+        self.assertIn(b'ustar/regtype', out)
+        self.assertIn(b'ustar/lnktype', out)
+        self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out)
+        self.assertIn(b'./ustar/linktest2/symtype', out)
+        self.assertIn(b'./ustar/linktest2/lnktype', out)
+        # Make sure it puts trailing slash for directory
+        self.assertIn(b'ustar/dirtype/', out)
+        self.assertIn(b'ustar/dirtype-with-size/', out)
+        # Make sure it is able to print unencodable characters
+        self.assertIn(br'ustar/umlauts-'
+                      br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
+        self.assertIn(br'misc/regtype-hpux-signed-chksum-'
+                      br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
+        self.assertIn(br'misc/regtype-old-v7-signed-chksum-'
+                      br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
+        self.assertIn(br'pax/bad-pax-\udce4\udcf6\udcfc', out)
+        self.assertIn(br'pax/hdrcharset-\udce4\udcf6\udcfc', out)
+        # Make sure it prints files separated by one newline without any
+        # 'ls -l'-like accessories if verbose flag is not being used
+        # ...
+        # ustar/conttype
+        # ustar/regtype
+        # ...
+        self.assertRegex(out, br'ustar/conttype ?\r?\n'
+                              br'ustar/regtype ?\r?\n')
+        # Make sure it does not print the source of link without verbose flag
+        self.assertNotIn(b'link to', out)
+        self.assertNotIn(b'->', out)
+
+    def test_list_verbose(self):
+        tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
+        with support.swap_attr(sys, 'stdout', tio):
+            self.tar.list(verbose=True)
+        out = tio.detach().getvalue()
+        # Make sure it prints files separated by one newline with 'ls -l'-like
+        # accessories if verbose flag is being used
+        # ...
+        # ?rw-r--r-- tarfile/tarfile     7011 2003-01-06 07:19:43 ustar/conttype
+        # ?rw-r--r-- tarfile/tarfile     7011 2003-01-06 07:19:43 ustar/regtype
+        # ...
+        self.assertRegex(out, (br'\?rw-r--r-- tarfile/tarfile\s+7011 '
+                               br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
+                               br'ustar/\w+type ?\r?\n') * 2)
+        # Make sure it prints the source of link with verbose flag
+        self.assertIn(b'ustar/symtype -> regtype', out)
+        self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
+        self.assertIn(b'./ustar/linktest2/lnktype link to '
+                      b'./ustar/linktest1/regtype', out)
+        self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' +
+                      (b'/123' * 125) + b'/longname', out)
+        self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' +
+                      (b'/123' * 125) + b'/longname', out)
+
+
+class GzipListTest(GzipTest, ListTest):
+    pass
+
+
+class Bz2ListTest(Bz2Test, ListTest):
+    pass
+
+
+class LzmaListTest(LzmaTest, ListTest):
+    pass
+
+
 class CommonReadTest(ReadTest):
 
     def test_empty_tarfile(self):
@@ -1766,8 +1844,9 @@
 
 class CommandLineTest(unittest.TestCase):
 
-    def tarfilecmd(self, *args):
-        rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args)
+    def tarfilecmd(self, *args, **kwargs):
+        rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args,
+                                                      **kwargs)
         return out.replace(os.linesep.encode(), b'\n')
 
     def tarfilecmd_failure(self, *args):
@@ -1815,24 +1894,26 @@
                     support.unlink(tmpname)
 
     def test_list_command(self):
-        self.make_simple_tarfile(tmpname)
-        with support.captured_stdout() as t:
-            with tarfile.open(tmpname, 'r') as tf:
-                tf.list(verbose=False)
-        expected = t.getvalue().encode(sys.getfilesystemencoding())
-        for opt in '-l', '--list':
-            out = self.tarfilecmd(opt, tmpname)
-            self.assertEqual(out, expected)
+        for tar_name in testtarnames:
+            with support.captured_stdout() as t:
+                with tarfile.open(tar_name, 'r') as tf:
+                    tf.list(verbose=False)
+            expected = t.getvalue().encode('ascii', 'backslashreplace')
+            for opt in '-l', '--list':
+                out = self.tarfilecmd(opt, tar_name,
+                                      PYTHONIOENCODING='ascii')
+                self.assertEqual(out, expected)
 
     def test_list_command_verbose(self):
-        self.make_simple_tarfile(tmpname)
-        with support.captured_stdout() as t:
-            with tarfile.open(tmpname, 'r') as tf:
-                tf.list(verbose=True)
-        expected = t.getvalue().encode(sys.getfilesystemencoding())
-        for opt in '-v', '--verbose':
-            out = self.tarfilecmd(opt, '-l', tmpname)
-            self.assertEqual(out, expected)
+        for tar_name in testtarnames:
+            with support.captured_stdout() as t:
+                with tarfile.open(tar_name, 'r') as tf:
+                    tf.list(verbose=True)
+            expected = t.getvalue().encode('ascii', 'backslashreplace')
+            for opt in '-v', '--verbose':
+                out = self.tarfilecmd(opt, '-l', tar_name,
+                                      PYTHONIOENCODING='ascii')
+                self.assertEqual(out, expected)
 
     def test_list_command_invalid_file(self):
         zipname = support.findfile('zipdir.zip')
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -24,6 +24,9 @@
 Library
 -------
 
+- Issue #19920: TarFile.list() no longer fails when outputs a listing
+  containing non-encodable characters.  Based on patch by Vajrasky Kok.
+
 - Issue #20515: Fix NULL pointer dereference introduced by issue #20368.
 
 - Issue #19186: Restore namespacing of expat symbols inside the pyexpat module.
@@ -118,6 +121,8 @@
 Tests
 -----
 
+- Issue #19920: Added tests for TarFile.list().  Based on patch by Vajrasky Kok.
+
 - Issue #19990: Added tests for the imghdr module.  Based on patch by
   Claudiu Popa.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list