[Python-checkins] cpython (2.7): Issue #24259: tarfile now raises a ReadError if an archive is truncated inside

lars.gustaebel python-checkins at python.org
Mon Jul 6 09:33:11 CEST 2015


https://hg.python.org/cpython/rev/372aa98eb72e
changeset:   96844:372aa98eb72e
branch:      2.7
user:        Lars Gustäbel <lars at gustaebel.de>
date:        Mon Jul 06 09:23:04 2015 +0200
summary:
  Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment.

files:
  Lib/tarfile.py           |  17 ++++++++++++++---
  Lib/test/test_tarfile.py |  24 ++++++++++++++++++++++++
  Misc/NEWS                |   3 +++
  3 files changed, 41 insertions(+), 3 deletions(-)


diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -744,12 +744,18 @@
         else:
             return self.readsparse(size)
 
+    def __read(self, size):
+        buf = self.fileobj.read(size)
+        if len(buf) != size:
+            raise ReadError("unexpected end of data")
+        return buf
+
     def readnormal(self, size):
         """Read operation for regular files.
         """
         self.fileobj.seek(self.offset + self.position)
         self.position += size
-        return self.fileobj.read(size)
+        return self.__read(size)
 
     def readsparse(self, size):
         """Read operation for sparse files.
@@ -777,7 +783,7 @@
             realpos = section.realpos + self.position - section.offset
             self.fileobj.seek(self.offset + realpos)
             self.position += size
-            return self.fileobj.read(size)
+            return self.__read(size)
         else:
             self.position += size
             return NUL * size
@@ -2336,8 +2342,13 @@
             self.firstmember = None
             return m
 
+        # Advance the file pointer.
+        if self.offset != self.fileobj.tell():
+            self.fileobj.seek(self.offset - 1)
+            if not self.fileobj.read(1):
+                raise ReadError("unexpected end of data")
+
         # Read the next block.
-        self.fileobj.seek(self.offset)
         tarinfo = None
         while True:
             try:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -285,6 +285,30 @@
                     "ignore_zeros=True should have skipped the %r-blocks" % char)
             tar.close()
 
+    def test_premature_end_of_archive(self):
+        for size in (512, 600, 1024, 1200):
+            with tarfile.open(tmpname, "w:") as tar:
+                t = tarfile.TarInfo("foo")
+                t.size = 1024
+                tar.addfile(t, StringIO.StringIO("a" * 1024))
+
+            with open(tmpname, "r+b") as fobj:
+                fobj.truncate(size)
+
+            with tarfile.open(tmpname) as tar:
+                with self.assertRaisesRegexp(tarfile.ReadError, "unexpected end of data"):
+                    for t in tar:
+                        pass
+
+            with tarfile.open(tmpname) as tar:
+                t = tar.next()
+
+                with self.assertRaisesRegexp(tarfile.ReadError, "unexpected end of data"):
+                    tar.extract(t, TEMPDIR)
+
+                with self.assertRaisesRegexp(tarfile.ReadError, "unexpected end of data"):
+                    tar.extractfile(t).read()
+
 
 class MiscReadTest(CommonReadTest):
     taropen = tarfile.TarFile.taropen
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@
 Library
 -------
 
+- Issue #24259: tarfile now raises a ReadError if an archive is truncated
+  inside a data segment.
+
 - Issue #24514: tarfile now tolerates number fields consisting of only
   whitespace.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list