[Python-checkins] gh-88233: zipfile: handle extras after a zip64 extra (GH-96161)

miss-islington webhook-mailer at python.org
Mon Feb 20 12:33:07 EST 2023


https://github.com/python/cpython/commit/62c0327487ce39ec6504919b7712e0f197075ef9
commit: 62c0327487ce39ec6504919b7712e0f197075ef9
branch: 3.11
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2023-02-20T09:33:00-08:00
summary:

gh-88233: zipfile: handle extras after a zip64 extra (GH-96161)


Previously, any data _after_ the zip64 extra would be removed.

With many new tests.

Fixes GH-88233
(cherry picked from commit 59e86caca812fc993c5eb7dc8ccd1508ffccba86)

Co-authored-by: Tim Hatch <tim at timhatch.com>
Automerge-Triggered-By: GH:jaraco

files:
A Misc/NEWS.d/next/Library/2022-09-05-12-17-34.gh-issue-88233.gff9qJ.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py

diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 35d78d96a68e..da4ddb916f8c 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -3485,5 +3485,67 @@ def test_cli_with_metadata_encoding_extract(self):
             self.assertIn(name, listing)
 
 
+class StripExtraTests(unittest.TestCase):
+    # Note: all of the "z" characters are technically invalid, but up
+    # to 3 bytes at the end of the extra will be passed through as they
+    # are too short to encode a valid extra.
+
+    ZIP64_EXTRA = 1
+
+    def test_no_data(self):
+        s = struct.Struct("<HH")
+        a = s.pack(self.ZIP64_EXTRA, 0)
+        b = s.pack(2, 0)
+        c = s.pack(3, 0)
+
+        self.assertEqual(b'', zipfile._strip_extra(a, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b, zipfile._strip_extra(b, (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b+b"z", zipfile._strip_extra(b+b"z", (self.ZIP64_EXTRA,)))
+
+        self.assertEqual(b+c, zipfile._strip_extra(a+b+c, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b+c, zipfile._strip_extra(b+a+c, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b+c, zipfile._strip_extra(b+c+a, (self.ZIP64_EXTRA,)))
+
+    def test_with_data(self):
+        s = struct.Struct("<HH")
+        a = s.pack(self.ZIP64_EXTRA, 1) + b"a"
+        b = s.pack(2, 2) + b"bb"
+        c = s.pack(3, 3) + b"ccc"
+
+        self.assertEqual(b"", zipfile._strip_extra(a, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b, zipfile._strip_extra(b, (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b+b"z", zipfile._strip_extra(b+b"z", (self.ZIP64_EXTRA,)))
+
+        self.assertEqual(b+c, zipfile._strip_extra(a+b+c, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b+c, zipfile._strip_extra(b+a+c, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b+c, zipfile._strip_extra(b+c+a, (self.ZIP64_EXTRA,)))
+
+    def test_multiples(self):
+        s = struct.Struct("<HH")
+        a = s.pack(self.ZIP64_EXTRA, 1) + b"a"
+        b = s.pack(2, 2) + b"bb"
+
+        self.assertEqual(b"", zipfile._strip_extra(a+a, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b"", zipfile._strip_extra(a+a+a, (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b"z", zipfile._strip_extra(a+a+b"z", (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b+b"z", zipfile._strip_extra(a+a+b+b"z", (self.ZIP64_EXTRA,)))
+
+        self.assertEqual(b, zipfile._strip_extra(a+a+b, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b, zipfile._strip_extra(a+b+a, (self.ZIP64_EXTRA,)))
+        self.assertEqual(b, zipfile._strip_extra(b+a+a, (self.ZIP64_EXTRA,)))
+
+    def test_too_short(self):
+        self.assertEqual(b"", zipfile._strip_extra(b"", (self.ZIP64_EXTRA,)))
+        self.assertEqual(b"z", zipfile._strip_extra(b"z", (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b"zz", zipfile._strip_extra(b"zz", (self.ZIP64_EXTRA,)))
+        self.assertEqual(
+            b"zzz", zipfile._strip_extra(b"zzz", (self.ZIP64_EXTRA,)))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index f69a43b764ef..a0b29e25a3cc 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -211,6 +211,8 @@ def _strip_extra(extra, xids):
         i = j
     if not modified:
         return extra
+    if start != len(extra):
+        buffer.append(extra[start:])
     return b''.join(buffer)
 
 def _check_zipfile(fp):
diff --git a/Misc/NEWS.d/next/Library/2022-09-05-12-17-34.gh-issue-88233.gff9qJ.rst b/Misc/NEWS.d/next/Library/2022-09-05-12-17-34.gh-issue-88233.gff9qJ.rst
new file mode 100644
index 000000000000..806f7011edc3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-09-05-12-17-34.gh-issue-88233.gff9qJ.rst
@@ -0,0 +1,2 @@
+Correctly preserve "extra" fields in ``zipfile`` regardless of their
+ordering relative to a zip64 "extra."



More information about the Python-checkins mailing list