[Python-checkins] r46967 - in python/trunk: Doc/lib/libzipfile.tex Lib/test/test_zipfile.py Lib/test/test_zipfile64.py Lib/zipfile.py Misc/NEWS

ronald.oussoren python-checkins at python.org
Thu Jun 15 10:14:23 CEST 2006


Author: ronald.oussoren
Date: Thu Jun 15 10:14:18 2006
New Revision: 46967

Added:
   python/trunk/Lib/test/test_zipfile64.py
Modified:
   python/trunk/Doc/lib/libzipfile.tex
   python/trunk/Lib/test/test_zipfile.py
   python/trunk/Lib/zipfile.py
   python/trunk/Misc/NEWS
Log:
Patch #1446489	(zipfile: support for ZIP64)


Modified: python/trunk/Doc/lib/libzipfile.tex
==============================================================================
--- python/trunk/Doc/lib/libzipfile.tex	(original)
+++ python/trunk/Doc/lib/libzipfile.tex	Thu Jun 15 10:14:18 2006
@@ -17,7 +17,8 @@
 Note}.
 
 This module does not currently handle ZIP files which have appended
-comments, or multi-disk ZIP files.
+comments, or multi-disk ZIP files. It can handle ZIP files that use the 
+ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
 
 The available attributes of this module are:
 
@@ -25,6 +26,11 @@
   The error raised for bad ZIP files.
 \end{excdesc}
 
+\begin{excdesc}{LargeZipFile}
+  The error raised when a ZIP file would require ZIP64 functionality but that
+  has not been enabled.
+\end{excdesc}
+
 \begin{classdesc*}{ZipFile}
   The class for reading and writing ZIP files.  See
   ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
@@ -77,7 +83,7 @@
 
 \subsection{ZipFile Objects \label{zipfile-objects}}
 
-\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression}}} 
+\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}} 
   Open a ZIP file, where \var{file} can be either a path to a file
   (a string) or a file-like object.  The \var{mode} parameter
   should be \code{'r'} to read an existing file, \code{'w'} to
@@ -100,6 +106,12 @@
   is specified but the \refmodule{zlib} module is not available,
   \exception{RuntimeError} is also raised.  The default is
   \constant{ZIP_STORED}. 
+  If \var{allowZip64} is \code{True} zipfile will create zipfiles that use
+  the ZIP64 extensions when the zipfile is larger than 2GBytes. If it is 
+  false (the default) zipfile will raise an exception when the zipfile would
+  require ZIP64 extensions. ZIP64 extensions are disabled by default because
+  the default zip and unzip commands on Unix (the InfoZIP utilities) don't 
+  support these extensions.
 \end{classdesc}
 
 \begin{methoddesc}{close}{}
@@ -132,8 +144,8 @@
 \end{methoddesc}
 
 \begin{methoddesc}{testzip}{}
-  Read all the files in the archive and check their CRC's.  Return the
-  name of the first bad file, or else return \code{None}.
+  Read all the files in the archive and check their CRC's and file
+  headers.  Return the name of the first bad file, or else return \code{None}.
 \end{methoddesc}
 
 \begin{methoddesc}{write}{filename\optional{, arcname\optional{,
@@ -284,10 +296,6 @@
   Byte offset to the file header.
 \end{memberdesc}
 
-\begin{memberdesc}[ZipInfo]{file_offset}
-  Byte offset to the start of the file data.
-\end{memberdesc}
-
 \begin{memberdesc}[ZipInfo]{CRC}
   CRC-32 of the uncompressed file.
 \end{memberdesc}

Modified: python/trunk/Lib/test/test_zipfile.py
==============================================================================
--- python/trunk/Lib/test/test_zipfile.py	(original)
+++ python/trunk/Lib/test/test_zipfile.py	Thu Jun 15 10:14:18 2006
@@ -4,7 +4,7 @@
 except ImportError:
     zlib = None
 
-import zipfile, os, unittest
+import zipfile, os, unittest, sys, shutil
 
 from StringIO import StringIO
 from tempfile import TemporaryFile
@@ -28,14 +28,70 @@
         zipfp = zipfile.ZipFile(f, "w", compression)
         zipfp.write(TESTFN, "another"+os.extsep+"name")
         zipfp.write(TESTFN, TESTFN)
+        zipfp.writestr("strfile", self.data)
         zipfp.close()
 
         # Read the ZIP archive
         zipfp = zipfile.ZipFile(f, "r", compression)
         self.assertEqual(zipfp.read(TESTFN), self.data)
         self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
+        self.assertEqual(zipfp.read("strfile"), self.data)
+
+        # Print the ZIP directory
+        fp = StringIO()
+        stdout = sys.stdout
+        try:
+            sys.stdout = fp
+
+            zipfp.printdir()
+        finally:
+            sys.stdout = stdout
+        
+        directory = fp.getvalue()
+        lines = directory.splitlines()
+        self.assertEquals(len(lines), 4) # Number of files + header
+
+        self.assert_('File Name' in lines[0])
+        self.assert_('Modified' in lines[0])
+        self.assert_('Size' in lines[0])
+
+        fn, date, time, size = lines[1].split()
+        self.assertEquals(fn, 'another.name')
+        # XXX: timestamp is not tested
+        self.assertEquals(size, str(len(self.data)))
+
+        # Check the namelist
+        names = zipfp.namelist()
+        self.assertEquals(len(names), 3)
+        self.assert_(TESTFN in names)
+        self.assert_("another"+os.extsep+"name" in names)
+        self.assert_("strfile" in names)
+
+        # Check infolist
+        infos = zipfp.infolist()
+        names = [ i.filename for i in infos ]
+        self.assertEquals(len(names), 3)
+        self.assert_(TESTFN in names)
+        self.assert_("another"+os.extsep+"name" in names)
+        self.assert_("strfile" in names)
+        for i in infos:
+            self.assertEquals(i.file_size, len(self.data))
+
+        # check getinfo
+        for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
+            info = zipfp.getinfo(nm)
+            self.assertEquals(info.filename, nm)
+            self.assertEquals(info.file_size, len(self.data))
+
+        # Check that testzip doesn't raise an exception
+        zipfp.testzip()
+
+
         zipfp.close()
 
+
+
+
     def testStored(self):
         for f in (TESTFN2, TemporaryFile(), StringIO()):
             self.zipTest(f, zipfile.ZIP_STORED)
@@ -59,6 +115,197 @@
         os.remove(TESTFN)
         os.remove(TESTFN2)
 
+class TestZip64InSmallFiles(unittest.TestCase):
+    # These tests test the ZIP64 functionality without using large files,
+    # see test_zipfile64 for proper tests.
+
+    def setUp(self):
+        self._limit = zipfile.ZIP64_LIMIT
+        zipfile.ZIP64_LIMIT = 5
+
+        line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000))
+        self.data = '\n'.join(line_gen)
+
+        # Make a source file with some lines
+        fp = open(TESTFN, "wb")
+        fp.write(self.data)
+        fp.close()
+
+    def largeFileExceptionTest(self, f, compression):
+        zipfp = zipfile.ZipFile(f, "w", compression)
+        self.assertRaises(zipfile.LargeZipFile, 
+                zipfp.write, TESTFN, "another"+os.extsep+"name")
+        zipfp.close()
+
+    def largeFileExceptionTest2(self, f, compression):
+        zipfp = zipfile.ZipFile(f, "w", compression)
+        self.assertRaises(zipfile.LargeZipFile, 
+                zipfp.writestr, "another"+os.extsep+"name", self.data)
+        zipfp.close()
+
+    def testLargeFileException(self):
+        for f in (TESTFN2, TemporaryFile(), StringIO()):
+            self.largeFileExceptionTest(f, zipfile.ZIP_STORED)
+            self.largeFileExceptionTest2(f, zipfile.ZIP_STORED)
+
+    def zipTest(self, f, compression):
+        # Create the ZIP archive
+        zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
+        zipfp.write(TESTFN, "another"+os.extsep+"name")
+        zipfp.write(TESTFN, TESTFN)
+        zipfp.writestr("strfile", self.data)
+        zipfp.close()
+
+        # Read the ZIP archive
+        zipfp = zipfile.ZipFile(f, "r", compression)
+        self.assertEqual(zipfp.read(TESTFN), self.data)
+        self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
+        self.assertEqual(zipfp.read("strfile"), self.data)
+
+        # Print the ZIP directory
+        fp = StringIO()
+        stdout = sys.stdout
+        try:
+            sys.stdout = fp
+
+            zipfp.printdir()
+        finally:
+            sys.stdout = stdout
+        
+        directory = fp.getvalue()
+        lines = directory.splitlines()
+        self.assertEquals(len(lines), 4) # Number of files + header
+
+        self.assert_('File Name' in lines[0])
+        self.assert_('Modified' in lines[0])
+        self.assert_('Size' in lines[0])
+
+        fn, date, time, size = lines[1].split()
+        self.assertEquals(fn, 'another.name')
+        # XXX: timestamp is not tested
+        self.assertEquals(size, str(len(self.data)))
+
+        # Check the namelist
+        names = zipfp.namelist()
+        self.assertEquals(len(names), 3)
+        self.assert_(TESTFN in names)
+        self.assert_("another"+os.extsep+"name" in names)
+        self.assert_("strfile" in names)
+
+        # Check infolist
+        infos = zipfp.infolist()
+        names = [ i.filename for i in infos ]
+        self.assertEquals(len(names), 3)
+        self.assert_(TESTFN in names)
+        self.assert_("another"+os.extsep+"name" in names)
+        self.assert_("strfile" in names)
+        for i in infos:
+            self.assertEquals(i.file_size, len(self.data))
+
+        # check getinfo
+        for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
+            info = zipfp.getinfo(nm)
+            self.assertEquals(info.filename, nm)
+            self.assertEquals(info.file_size, len(self.data))
+
+        # Check that testzip doesn't raise an exception
+        zipfp.testzip()
+
+
+        zipfp.close()
+
+    def testStored(self):
+        for f in (TESTFN2, TemporaryFile(), StringIO()):
+            self.zipTest(f, zipfile.ZIP_STORED)
+
+
+    if zlib:
+        def testDeflated(self):
+            for f in (TESTFN2, TemporaryFile(), StringIO()):
+                self.zipTest(f, zipfile.ZIP_DEFLATED)
+
+    def testAbsoluteArcnames(self):
+        zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True)
+        zipfp.write(TESTFN, "/absolute")
+        zipfp.close()
+
+        zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED)
+        self.assertEqual(zipfp.namelist(), ["absolute"])
+        zipfp.close()
+
+
+    def tearDown(self):
+        zipfile.ZIP64_LIMIT = self._limit
+        os.remove(TESTFN)
+        os.remove(TESTFN2)
+
+class PyZipFileTests(unittest.TestCase):
+    def testWritePyfile(self):
+        zipfp  = zipfile.PyZipFile(TemporaryFile(), "w")
+        fn = __file__
+        if fn.endswith('.pyc') or fn.endswith('.pyo'):
+            fn = fn[:-1]
+
+        zipfp.writepy(fn)
+
+        bn = os.path.basename(fn)
+        self.assert_(bn not in zipfp.namelist())
+        self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
+        zipfp.close()
+
+
+        zipfp  = zipfile.PyZipFile(TemporaryFile(), "w")
+        fn = __file__
+        if fn.endswith('.pyc') or fn.endswith('.pyo'):
+            fn = fn[:-1]
+
+        zipfp.writepy(fn, "testpackage")
+
+        bn = "%s/%s"%("testpackage", os.path.basename(fn))
+        self.assert_(bn not in zipfp.namelist())
+        self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
+        zipfp.close()
+
+    def testWritePythonPackage(self):
+        import email
+        packagedir = os.path.dirname(email.__file__)
+
+        zipfp  = zipfile.PyZipFile(TemporaryFile(), "w")
+        zipfp.writepy(packagedir)
+
+        # Check for a couple of modules at different levels of the hieararchy
+        names = zipfp.namelist()
+        self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names)
+        self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names)
+
+    def testWritePythonDirectory(self):
+        os.mkdir(TESTFN2)
+        try:
+            fp = open(os.path.join(TESTFN2, "mod1.py"), "w")
+            fp.write("print 42\n")
+            fp.close()
+
+            fp = open(os.path.join(TESTFN2, "mod2.py"), "w")
+            fp.write("print 42 * 42\n")
+            fp.close()
+
+            fp = open(os.path.join(TESTFN2, "mod2.txt"), "w")
+            fp.write("bla bla bla\n")
+            fp.close()
+
+            zipfp  = zipfile.PyZipFile(TemporaryFile(), "w")
+            zipfp.writepy(TESTFN2)
+
+            names = zipfp.namelist()
+            self.assert_('mod1.pyc' in names or 'mod1.pyo' in names)
+            self.assert_('mod2.pyc' in names or 'mod2.pyo' in names)
+            self.assert_('mod2.txt' not in names)
+
+        finally:
+            shutil.rmtree(TESTFN2)
+
+
+
 class OtherTests(unittest.TestCase):
     def testCloseErroneousFile(self):
         # This test checks that the ZipFile constructor closes the file object
@@ -103,7 +350,8 @@
         self.assertRaises(RuntimeError, zipf.testzip)
 
 def test_main():
-    run_unittest(TestsWithSourceFile, OtherTests)
+    run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
+    #run_unittest(TestZip64InSmallFiles)
 
 if __name__ == "__main__":
     test_main()

Added: python/trunk/Lib/test/test_zipfile64.py
==============================================================================
--- (empty file)
+++ python/trunk/Lib/test/test_zipfile64.py	Thu Jun 15 10:14:18 2006
@@ -0,0 +1,67 @@
+# Tests of the full ZIP64 functionality of zipfile
+# The test_support.requires call is the only reason for keeping this separate
+# from test_zipfile
+from test import test_support
+test_support.requires(
+        'largefile', 
+        'test requires loads of disk-space bytes and a long time to run'
+    )
+
+# We can test part of the module without zlib.
+try:
+    import zlib
+except ImportError:
+    zlib = None
+
+import zipfile, os, unittest
+
+from StringIO import StringIO
+from tempfile import TemporaryFile
+
+from test.test_support import TESTFN, run_unittest
+
+TESTFN2 = TESTFN + "2"
+
+class TestsWithSourceFile(unittest.TestCase):
+    def setUp(self):
+        line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000000))
+        self.data = '\n'.join(line_gen)
+
+        # Make a source file with some lines
+        fp = open(TESTFN, "wb")
+        fp.write(self.data)
+        fp.close()
+
+    def zipTest(self, f, compression):
+        # Create the ZIP archive
+        filecount = int(((1 << 32) / len(self.data)) * 1.5)
+        zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
+
+        for num in range(filecount):
+            zipfp.writestr("testfn%d"%(num,), self.data)
+        zipfp.close()
+
+        # Read the ZIP archive
+        zipfp = zipfile.ZipFile(f, "r", compression)
+        for num in range(filecount):
+            self.assertEqual(zipfp.read("testfn%d"%(num,)), self.data)
+        zipfp.close()
+
+    def testStored(self):
+        for f in (TESTFN2, TemporaryFile()):
+            self.zipTest(f, zipfile.ZIP_STORED)
+
+    if zlib:
+        def testDeflated(self):
+            for f in (TESTFN2, TemporaryFile()):
+                self.zipTest(f, zipfile.ZIP_DEFLATED)
+
+    def tearDown(self):
+        os.remove(TESTFN)
+        os.remove(TESTFN2)
+
+def test_main():
+    run_unittest(TestsWithSourceFile)
+
+if __name__ == "__main__":
+    test_main()

Modified: python/trunk/Lib/zipfile.py
==============================================================================
--- python/trunk/Lib/zipfile.py	(original)
+++ python/trunk/Lib/zipfile.py	Thu Jun 15 10:14:18 2006
@@ -1,7 +1,8 @@
-"Read and write ZIP files."
-
+"""
+Read and write ZIP files.
+"""
 import struct, os, time, sys
-import binascii
+import binascii, cStringIO
 
 try:
     import zlib # We may need its compression method
@@ -9,12 +10,22 @@
     zlib = None
 
 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
-           "ZipInfo", "ZipFile", "PyZipFile"]
+           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
 
 class BadZipfile(Exception):
     pass
+
+
+class LargeZipFile(Exception):
+    """ 
+    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
+    and those extensions are disabled.
+    """
+
 error = BadZipfile      # The exception raised by this module
 
+ZIP64_LIMIT= (1 << 31) - 1
+
 # constants for Zip file compression methods
 ZIP_STORED = 0
 ZIP_DEFLATED = 8
@@ -27,6 +38,11 @@
 stringCentralDir = "PK\001\002"   # magic number for central directory
 structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
 stringFileHeader = "PK\003\004"   # magic number for file header
+structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
+stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
+structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
+stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
+
 
 # indexes of entries in the central directory structure
 _CD_SIGNATURE = 0
@@ -75,6 +91,40 @@
         pass
     return False
 
+def _EndRecData64(fpin, offset, endrec):
+    """
+    Read the ZIP64 end-of-archive records and use that to update endrec
+    """
+    locatorSize = struct.calcsize(structEndArchive64Locator)
+    fpin.seek(offset - locatorSize, 2)
+    data = fpin.read(locatorSize)
+    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
+    if sig != stringEndArchive64Locator: 
+        return endrec
+
+    if diskno != 0 or disks != 1:
+        raise BadZipfile("zipfiles that span multiple disks are not supported")
+
+    # Assume no 'zip64 extensible data' 
+    endArchiveSize = struct.calcsize(structEndArchive64)
+    fpin.seek(offset - locatorSize - endArchiveSize, 2)
+    data = fpin.read(endArchiveSize)
+    sig, sz, create_version, read_version, disk_num, disk_dir, \
+            dircount, dircount2, dirsize, diroffset = \
+            struct.unpack(structEndArchive64, data)
+    if sig != stringEndArchive64: 
+        return endrec
+
+    # Update the original endrec using data from the ZIP64 record
+    endrec[1] = disk_num
+    endrec[2] = disk_dir
+    endrec[3] = dircount
+    endrec[4] = dircount2
+    endrec[5] = dirsize
+    endrec[6] = diroffset
+    return endrec
+
+
 def _EndRecData(fpin):
     """Return data from the "End of Central Directory" record, or None.
 
@@ -88,6 +138,8 @@
         endrec = list(endrec)
         endrec.append("")               # Append the archive comment
         endrec.append(filesize - 22)    # Append the record start offset
+        if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
+            return _EndRecData64(fpin, -22, endrec)
         return endrec
     # Search the last END_BLOCK bytes of the file for the record signature.
     # The comment is appended to the ZIP file and has a 16 bit length.
@@ -106,25 +158,50 @@
             # Append the archive comment and start offset
             endrec.append(comment)
             endrec.append(filesize - END_BLOCK + start)
+            if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
+                return _EndRecData64(fpin, - END_BLOCK + start, endrec)
             return endrec
     return      # Error, return None
 
 
-class ZipInfo:
+class ZipInfo (object):
     """Class with attributes describing each file in the ZIP archive."""
 
+    __slots__ = (
+            'orig_filename',
+            'filename',
+            'date_time',
+            'compress_type',
+            'comment',
+            'extra',
+            'create_system',
+            'create_version',
+            'extract_version',
+            'reserved',
+            'flag_bits',
+            'volume',
+            'internal_attr',
+            'external_attr',
+            'header_offset',
+            'CRC',
+            'compress_size',
+            'file_size',
+        )
+
     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
         self.orig_filename = filename   # Original file name in archive
-# Terminate the file name at the first null byte.  Null bytes in file
-# names are used as tricks by viruses in archives.
+
+        # Terminate the file name at the first null byte.  Null bytes in file
+        # names are used as tricks by viruses in archives.
         null_byte = filename.find(chr(0))
         if null_byte >= 0:
             filename = filename[0:null_byte]
-# This is used to ensure paths in generated ZIP files always use
-# forward slashes as the directory separator, as required by the
-# ZIP format specification.
-        if os.sep != "/":
+        # This is used to ensure paths in generated ZIP files always use
+        # forward slashes as the directory separator, as required by the
+        # ZIP format specification.
+        if os.sep != "/" and os.sep in filename:
             filename = filename.replace(os.sep, "/")
+
         self.filename = filename        # Normalized file name
         self.date_time = date_time      # year, month, day, hour, min, sec
         # Standard values:
@@ -145,7 +222,6 @@
         self.external_attr = 0          # External file attributes
         # Other attributes are set by class ZipFile:
         # header_offset         Byte offset to the file header
-        # file_offset           Byte offset to the start of the file data
         # CRC                   CRC-32 of the uncompressed file
         # compress_size         Size of the compressed file
         # file_size             Size of the uncompressed file
@@ -162,29 +238,85 @@
             CRC = self.CRC
             compress_size = self.compress_size
             file_size = self.file_size
+
+        extra = self.extra
+
+        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+            # File is larger than what fits into a 4 byte integer,
+            # fall back to the ZIP64 extension
+            fmt = '<hhqq'
+            extra = extra + struct.pack(fmt,
+                    1, struct.calcsize(fmt)-4, file_size, compress_size)
+            file_size = 0xffffffff # -1
+            compress_size = 0xffffffff # -1
+            self.extract_version = max(45, self.extract_version)
+            self.create_version = max(45, self.extract_version)
+
         header = struct.pack(structFileHeader, stringFileHeader,
                  self.extract_version, self.reserved, self.flag_bits,
                  self.compress_type, dostime, dosdate, CRC,
                  compress_size, file_size,
-                 len(self.filename), len(self.extra))
-        return header + self.filename + self.extra
+                 len(self.filename), len(extra))
+        return header + self.filename + extra
+
+    def _decodeExtra(self):
+        # Try to decode the extra field.
+        extra = self.extra
+        unpack = struct.unpack
+        while extra:
+            tp, ln = unpack('<hh', extra[:4])
+            if tp == 1:
+                if ln >= 24:
+                    counts = unpack('<qqq', extra[4:28])
+                elif ln == 16:
+                    counts = unpack('<qq', extra[4:20])
+                elif ln == 8:
+                    counts = unpack('<q', extra[4:12])
+                elif ln == 0:
+                    counts = ()
+                else:
+                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
 
+                idx = 0
+
+                # ZIP64 extension (large files and/or large archives)
+                if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
+                    self.file_size = counts[idx]
+                    idx += 1
+
+                if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
+                    self.compress_size = counts[idx]
+                    idx += 1
+
+                if self.header_offset == -1 or self.header_offset == 0xffffffffL:
+                    old = self.header_offset
+                    self.header_offset = counts[idx]
+                    idx+=1
+
+            extra = extra[ln+4:]
+               
 
 class ZipFile:
     """ Class with methods to open, read, write, close, list zip files.
 
-    z = ZipFile(file, mode="r", compression=ZIP_STORED)
+    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
 
     file: Either the path to the file, or a file-like object.
           If it is a path, the file will be opened and closed by ZipFile.
     mode: The mode can be either read "r", write "w" or append "a".
     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
+    allowZip64: if True ZipFile will create files with ZIP64 extensions when
+                needed, otherwise it will raise an exception when this would
+                be necessary.
+
     """
 
     fp = None                   # Set here since __del__ checks it
 
-    def __init__(self, file, mode="r", compression=ZIP_STORED):
+    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
         """Open the ZIP file with mode read "r", write "w" or append "a"."""
+        self._allowZip64 = allowZip64
+        self._didModify = False
         if compression == ZIP_STORED:
             pass
         elif compression == ZIP_DEFLATED:
@@ -250,7 +382,10 @@
         offset_cd = endrec[6]   # offset of central directory
         self.comment = endrec[8]        # archive comment
         # endrec[9] is the offset of the "End of Central Dir" record
-        x = endrec[9] - size_cd
+        if endrec[9] > ZIP64_LIMIT:
+            x = endrec[9] - size_cd - 56 - 20
+        else:
+            x = endrec[9] - size_cd
         # "concat" is zero, unless zip was concatenated to another file
         concat = x - offset_cd
         if self.debug > 2:
@@ -258,6 +393,8 @@
         # self.start_dir:  Position of start of central directory
         self.start_dir = offset_cd + concat
         fp.seek(self.start_dir, 0)
+        data = fp.read(size_cd)
+        fp = cStringIO.StringIO(data)
         total = 0
         while total < size_cd:
             centdir = fp.read(46)
@@ -275,8 +412,7 @@
             total = (total + centdir[_CD_FILENAME_LENGTH]
                      + centdir[_CD_EXTRA_FIELD_LENGTH]
                      + centdir[_CD_COMMENT_LENGTH])
-            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
-            # file_offset must be computed below...
+            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
             (x.create_version, x.create_system, x.extract_version, x.reserved,
                 x.flag_bits, x.compress_type, t, d,
                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
@@ -284,28 +420,14 @@
             # Convert date/time code to (year, month, day, hour, min, sec)
             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+
+            x._decodeExtra()
+            x.header_offset = x.header_offset + concat
             self.filelist.append(x)
             self.NameToInfo[x.filename] = x
             if self.debug > 2:
                 print "total", total
-        for data in self.filelist:
-            fp.seek(data.header_offset, 0)
-            fheader = fp.read(30)
-            if fheader[0:4] != stringFileHeader:
-                raise BadZipfile, "Bad magic number for file header"
-            fheader = struct.unpack(structFileHeader, fheader)
-            # file_offset is computed here, since the extra field for
-            # the central directory and for the local file header
-            # refer to different fields, and they can have different
-            # lengths
-            data.file_offset = (data.header_offset + 30
-                                + fheader[_FH_FILENAME_LENGTH]
-                                + fheader[_FH_EXTRA_FIELD_LENGTH])
-            fname = fp.read(fheader[_FH_FILENAME_LENGTH])
-            if fname != data.orig_filename:
-                raise RuntimeError, \
-                      'File name in directory "%s" and header "%s" differ.' % (
-                          data.orig_filename, fname)
+
 
     def namelist(self):
         """Return a list of file names in the archive."""
@@ -334,6 +456,7 @@
             except BadZipfile:
                 return zinfo.filename
 
+
     def getinfo(self, name):
         """Return the instance of ZipInfo given 'name'."""
         return self.NameToInfo[name]
@@ -347,7 +470,24 @@
                   "Attempt to read ZIP archive that was already closed"
         zinfo = self.getinfo(name)
         filepos = self.fp.tell()
-        self.fp.seek(zinfo.file_offset, 0)
+
+        self.fp.seek(zinfo.header_offset, 0)
+
+        # Skip the file header:
+        fheader = self.fp.read(30)
+        if fheader[0:4] != stringFileHeader:
+            raise BadZipfile, "Bad magic number for file header"
+
+        fheader = struct.unpack(structFileHeader, fheader)
+        fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
+        if fheader[_FH_EXTRA_FIELD_LENGTH]:
+            self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+
+        if fname != zinfo.orig_filename:
+            raise BadZipfile, \
+                      'File name in directory "%s" and header "%s" differ.' % (
+                          zinfo.orig_filename, fname)
+
         bytes = self.fp.read(zinfo.compress_size)
         self.fp.seek(filepos, 0)
         if zinfo.compress_type == ZIP_STORED:
@@ -388,6 +528,12 @@
         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
             raise RuntimeError, \
                   "That compression method is not supported"
+        if zinfo.file_size > ZIP64_LIMIT:
+            if not self._allowZip64:
+                raise LargeZipFile("Filesize would require ZIP64 extensions")
+        if zinfo.header_offset > ZIP64_LIMIT:
+            if not self._allowZip64:
+                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
 
     def write(self, filename, arcname=None, compress_type=None):
         """Put the bytes from filename into the archive under the name
@@ -407,16 +553,19 @@
             zinfo.compress_type = self.compression
         else:
             zinfo.compress_type = compress_type
-        self._writecheck(zinfo)
-        fp = open(filename, "rb")
+
+        zinfo.file_size = st.st_size
         zinfo.flag_bits = 0x00
         zinfo.header_offset = self.fp.tell()    # Start of header bytes
+
+        self._writecheck(zinfo)
+        self._didModify = True
+        fp = open(filename, "rb")
         # Must overwrite CRC and sizes with correct data later
         zinfo.CRC = CRC = 0
         zinfo.compress_size = compress_size = 0
         zinfo.file_size = file_size = 0
         self.fp.write(zinfo.FileHeader())
-        zinfo.file_offset = self.fp.tell()      # Start of file bytes
         if zinfo.compress_type == ZIP_DEFLATED:
             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
                  zlib.DEFLATED, -15)
@@ -461,8 +610,10 @@
             zinfo.compress_type = self.compression
         else:
             zinfo = zinfo_or_arcname
-        self._writecheck(zinfo)
         zinfo.file_size = len(bytes)            # Uncompressed size
+        zinfo.header_offset = self.fp.tell()    # Start of header bytes
+        self._writecheck(zinfo)
+        self._didModify = True
         zinfo.CRC = binascii.crc32(bytes)       # CRC-32 checksum
         if zinfo.compress_type == ZIP_DEFLATED:
             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
@@ -473,8 +624,8 @@
             zinfo.compress_size = zinfo.file_size
         zinfo.header_offset = self.fp.tell()    # Start of header bytes
         self.fp.write(zinfo.FileHeader())
-        zinfo.file_offset = self.fp.tell()      # Start of file bytes
         self.fp.write(bytes)
+        self.fp.flush()
         if zinfo.flag_bits & 0x08:
             # Write CRC and file sizes after the file data
             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
@@ -491,7 +642,8 @@
         records."""
         if self.fp is None:
             return
-        if self.mode in ("w", "a"):             # write ending records
+        
+        if self.mode in ("w", "a") and self._didModify: # write ending records
             count = 0
             pos1 = self.fp.tell()
             for zinfo in self.filelist:         # write central directory
@@ -499,23 +651,72 @@
                 dt = zinfo.date_time
                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+                extra = []
+                if zinfo.file_size > ZIP64_LIMIT \
+                        or zinfo.compress_size > ZIP64_LIMIT:
+                    extra.append(zinfo.file_size)
+                    extra.append(zinfo.compress_size)
+                    file_size = 0xffffffff #-1
+                    compress_size = 0xffffffff #-1
+                else:
+                    file_size = zinfo.file_size
+                    compress_size = zinfo.compress_size
+
+                if zinfo.header_offset > ZIP64_LIMIT:
+                    extra.append(zinfo.header_offset)
+                    header_offset = 0xffffffff #-1
+                else:
+                    header_offset = zinfo.header_offset
+
+                extra_data = zinfo.extra
+                if extra:
+                    # Append a ZIP64 field to the extra's
+                    extra_data = struct.pack(
+                            '<hh' + 'q'*len(extra),
+                            1, 8*len(extra), *extra) + extra_data
+                    
+                    extract_version = max(45, zinfo.extract_version)
+                    create_version = max(45, zinfo.create_version)
+                else:
+                    extract_version = zinfo.extract_version
+                    create_version = zinfo.create_version
+
                 centdir = struct.pack(structCentralDir,
-                  stringCentralDir, zinfo.create_version,
-                  zinfo.create_system, zinfo.extract_version, zinfo.reserved,
+                  stringCentralDir, create_version,
+                  zinfo.create_system, extract_version, zinfo.reserved,
                   zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
-                  zinfo.CRC, zinfo.compress_size, zinfo.file_size,
-                  len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
+                  zinfo.CRC, compress_size, file_size,
+                  len(zinfo.filename), len(extra_data), len(zinfo.comment),
                   0, zinfo.internal_attr, zinfo.external_attr,
-                  zinfo.header_offset)
+                  header_offset)
                 self.fp.write(centdir)
                 self.fp.write(zinfo.filename)
-                self.fp.write(zinfo.extra)
+                self.fp.write(extra_data)
                 self.fp.write(zinfo.comment)
+
             pos2 = self.fp.tell()
             # Write end-of-zip-archive record
-            endrec = struct.pack(structEndArchive, stringEndArchive,
-                     0, 0, count, count, pos2 - pos1, pos1, 0)
-            self.fp.write(endrec)
+            if pos1 > ZIP64_LIMIT:
+                # Need to write the ZIP64 end-of-archive records
+                zip64endrec = struct.pack(
+                        structEndArchive64, stringEndArchive64,
+                        44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
+                self.fp.write(zip64endrec)
+
+                zip64locrec = struct.pack(
+                        structEndArchive64Locator, 
+                        stringEndArchive64Locator, 0, pos2, 1)
+                self.fp.write(zip64locrec)
+
+                pos3 = self.fp.tell()
+                endrec = struct.pack(structEndArchive, stringEndArchive,
+                         0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
+                self.fp.write(endrec)
+
+            else:
+                endrec = struct.pack(structEndArchive, stringEndArchive,
+                         0, 0, count, count, pos2 - pos1, pos1, 0)
+                self.fp.write(endrec)
             self.fp.flush()
         if not self._filePassed:
             self.fp.close()
@@ -619,3 +820,80 @@
         if basename:
             archivename = "%s/%s" % (basename, archivename)
         return (fname, archivename)
+
+
+def main(args = None):
+    import textwrap
+    USAGE=textwrap.dedent("""\
+        Usage:
+            zipfile.py -l zipfile.zip        # Show listing of a zipfile
+            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
+            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
+            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
+        """)
+    if args is None:
+        args = sys.argv[1:]
+
+    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
+        print USAGE
+        sys.exit(1)
+
+    if args[0] == '-l':
+        if len(args) != 2:
+            print USAGE
+            sys.exit(1)
+        zf = ZipFile(args[1], 'r')
+        zf.printdir()
+        zf.close()
+
+    elif args[0] == '-t':
+        if len(args) != 2:
+            print USAGE
+            sys.exit(1)
+        zf = ZipFile(args[1], 'r')
+        zf.testzip()
+        print "Done testing"
+
+    elif args[0] == '-e':
+        if len(args) != 3:
+            print USAGE
+            sys.exit(1)
+
+        zf = ZipFile(args[1], 'r')
+        out = args[2]
+        for path in zf.namelist():
+            if path.startswith('./'): 
+                tgt = os.path.join(out, path[2:])
+            else:
+                tgt = os.path.join(out, path)
+
+            tgtdir = os.path.dirname(tgt)
+            if not os.path.exists(tgtdir):
+                os.makedirs(tgtdir)
+            fp = open(tgt, 'wb')
+            fp.write(zf.read(path))
+            fp.close()
+        zf.close()
+
+    elif args[0] == '-c':
+        if len(args) < 3:
+            print USAGE
+            sys.exit(1)
+
+        def addToZip(zf, path, zippath):
+            if os.path.isfile(path):
+                zf.write(path, zippath, ZIP_DEFLATED)
+            elif os.path.isdir(path):
+                for nm in os.listdir(path):
+                    addToZip(zf, 
+                            os.path.join(path, nm), os.path.join(zippath, nm))
+            # else: ignore 
+
+        zf = ZipFile(args[1], 'w', allowZip64=True)
+        for src in args[2:]:
+            addToZip(zf, src, os.path.basename(src))
+
+        zf.close()
+
+if __name__ == "__main__":
+    main()

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Jun 15 10:14:18 2006
@@ -152,6 +152,7 @@
   aborts the db transaction safely when a modifier callback fails.
   Fixes SF python patch/bug #1408584.
 
+- Patch #1446489: add support for the ZIP64 extensions to zipfile. 
 
 Library
 -------


More information about the Python-checkins mailing list