[Python-checkins] cpython (merge 3.2 -> default): Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number

lars.gustaebel python-checkins at python.org
Fri Oct 14 12:54:46 CEST 2011


http://hg.python.org/cpython/rev/158430b2b552
changeset:   72924:158430b2b552
parent:      72922:2c223d686feb
parent:      72923:341008eab87d
user:        Lars Gustäbel <lars at gustaebel.de>
date:        Fri Oct 14 12:53:10 2011 +0200
summary:
  Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number fields in tarfile.

The nti() function that converts a number field from a tar header to a number
failed to decode GNU tar specific base-256 fields. I also added support for
decoding and encoding negative base-256 number fields.

files:
  Lib/tarfile.py           |  45 ++++++++++++++-------------
  Lib/test/test_tarfile.py |  24 ++++++++++++++-
  Misc/NEWS                |   3 +
  3 files changed, 50 insertions(+), 22 deletions(-)


diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -194,16 +194,18 @@
     """
     # There are two possible encodings for a number field, see
     # itn() below.
-    if s[0] != chr(0o200):
+    if s[0] in (0o200, 0o377):
+        n = 0
+        for i in range(len(s) - 1):
+            n <<= 8
+            n += s[i + 1]
+        if s[0] == 0o377:
+            n = -(256 ** (len(s) - 1) - n)
+    else:
         try:
             n = int(nts(s, "ascii", "strict") or "0", 8)
         except ValueError:
             raise InvalidHeaderError("invalid header")
-    else:
-        n = 0
-        for i in range(len(s) - 1):
-            n <<= 8
-            n += ord(s[i + 1])
     return n
 
 def itn(n, digits=8, format=DEFAULT_FORMAT):
@@ -212,25 +214,26 @@
     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
     # octal digits followed by a null-byte, this allows values up to
     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
-    # that if necessary. A leading 0o200 byte indicates this particular
-    # encoding, the following digits-1 bytes are a big-endian
-    # representation. This allows values up to (256**(digits-1))-1.
+    # that if necessary. A leading 0o200 or 0o377 byte indicate this
+    # particular encoding, the following digits-1 bytes are a big-endian
+    # base-256 representation. This allows values up to (256**(digits-1))-1.
+    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
+    # number.
     if 0 <= n < 8 ** (digits - 1):
         s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
+    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
+        if n >= 0:
+            s = bytearray([0o200])
+        else:
+            s = bytearray([0o377])
+            n = 256 ** digits + n
+
+        for i in range(digits - 1):
+            s.insert(1, n & 0o377)
+            n >>= 8
     else:
-        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
-            raise ValueError("overflow in number field")
+        raise ValueError("overflow in number field")
 
-        if n < 0:
-            # XXX We mimic GNU tar's behaviour with negative numbers,
-            # this could raise OverflowError.
-            n = struct.unpack("L", struct.pack("l", n))[0]
-
-        s = bytearray()
-        for i in range(digits - 1):
-            s.insert(0, n & 0o377)
-            n >>= 8
-        s.insert(0, 0o200)
     return s
 
 def calc_chksums(buf):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1582,9 +1582,31 @@
         self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo")
         self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo")
 
-    def test_number_fields(self):
+    def test_read_number_fields(self):
+        # Issue 13158: Test if GNU tar specific base-256 number fields
+        # are decoded correctly.
+        self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
+        self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
+        self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), 0o10000000)
+        self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), 0xffffffff)
+        self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), -1)
+        self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), -100)
+        self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), -0x100000000000000)
+
+    def test_write_number_fields(self):
         self.assertEqual(tarfile.itn(1), b"0000001\x00")
+        self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00")
+        self.assertEqual(tarfile.itn(0o10000000), b"\x80\x00\x00\x00\x00\x20\x00\x00")
         self.assertEqual(tarfile.itn(0xffffffff), b"\x80\x00\x00\x00\xff\xff\xff\xff")
+        self.assertEqual(tarfile.itn(-1), b"\xff\xff\xff\xff\xff\xff\xff\xff")
+        self.assertEqual(tarfile.itn(-100), b"\xff\xff\xff\xff\xff\xff\xff\x9c")
+        self.assertEqual(tarfile.itn(-0x100000000000000), b"\xff\x00\x00\x00\x00\x00\x00\x00")
+
+    def test_number_field_limits(self):
+        self.assertRaises(ValueError, tarfile.itn, -1, 8, tarfile.USTAR_FORMAT)
+        self.assertRaises(ValueError, tarfile.itn, 0o10000000, 8, tarfile.USTAR_FORMAT)
+        self.assertRaises(ValueError, tarfile.itn, -0x10000000001, 6, tarfile.GNU_FORMAT)
+        self.assertRaises(ValueError, tarfile.itn, 0x10000000000, 6, tarfile.GNU_FORMAT)
 
 
 class ContextManagerTest(unittest.TestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -305,6 +305,9 @@
 Library
 -------
 
+- Issue #13158: Fix decoding and encoding of GNU tar specific base-256 number
+  fields in tarfile.
+
 - Issue #13025: mimetypes is now reading MIME types using the UTF-8 encoding,
   instead of the locale encoding.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list