[Jython-checkins] jython: Add zlib flush options when using zlib.compressobj. Fixes #2434

jim.baker jython-checkins at python.org
Mon Feb 1 22:13:47 EST 2016


https://hg.python.org/jython/rev/767d5206e120
changeset:   7885:767d5206e120
user:        Jim Baker <jim.baker at rackspace.com>
date:        Mon Feb 01 20:13:13 2016 -0700
summary:
  Add zlib flush options when using zlib.compressobj. Fixes #2434

Supports zlib flush options: Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH,
which became possible as of Java 7. In addition, supports gzip header
(fixed) and trailer (based on CRC32, size) when flushed. Also supports
incremental read of gzip header when decompressing.

Now fully passes all tests in urllib3 (see #2434 for those bugs) with
the exception of those tests using coverage (requires fix for #1638)
and fcntl (requires fix for #1943).

In addition, the gzip module, with one minor patch to support Jython's
io implementation, is updated to latest in CPython 2.x. test_gzip and
test_zlib also use stock tests.

files:
  Lib/gzip.py                      |  130 ++-
  Lib/test/test_support.py         |    2 +
  Lib/test/test_zlib.py            |  636 -------------------
  Lib/zlib.py                      |  141 +++-
  lib-python/2.7/gzip.py           |  111 +-
  lib-python/2.7/test/test_gzip.py |   68 +-
  lib-python/2.7/test/test_zlib.py |  155 ++--
  7 files changed, 372 insertions(+), 871 deletions(-)


diff --git a/Lib/gzip.py b/Lib/gzip.py
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -21,6 +21,9 @@
     # or unsigned.
     output.write(struct.pack("<L", value))
 
+def read32(input):
+    return struct.unpack("<I", input.read(4))[0]
+
 def open(filename, mode="rb", compresslevel=9):
     """Shorthand for GzipFile(filename, mode, compresslevel).
 
@@ -161,9 +164,16 @@
     def _write_gzip_header(self):
         self.fileobj.write('\037\213')             # magic header
         self.fileobj.write('\010')                 # compression method
-        fname = os.path.basename(self.name)
-        if fname.endswith(".gz"):
-            fname = fname[:-3]
+        try:
+            # RFC 1952 requires the FNAME field to be Latin-1. Do not
+            # include filenames that cannot be represented that way.
+            fname = os.path.basename(self.name)
+            if not isinstance(fname, str):
+                fname = fname.encode('latin-1')
+            if fname.endswith('.gz'):
+                fname = fname[:-3]
+        except UnicodeEncodeError:
+            fname = ''
         flags = 0
         if fname:
             flags = FNAME
@@ -181,28 +191,24 @@
         self.crc = zlib.crc32("") & 0xffffffffL
         self.size = 0
 
-    def _read_exact(self, n):
-        data = self.fileobj.read(n)
-        while len(data) < n:
-            b = self.fileobj.read(n - len(data))
-            if not b:
-                raise EOFError("Compressed file ended before the "
-                               "end-of-stream marker was reached")
-            data += b
-        return data
-
     def _read_gzip_header(self):
         magic = self.fileobj.read(2)
         if magic != '\037\213':
             raise IOError, 'Not a gzipped file'
-
-        method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8))
+        method = ord( self.fileobj.read(1) )
         if method != 8:
             raise IOError, 'Unknown compression method'
+        flag = ord( self.fileobj.read(1) )
+        self.mtime = read32(self.fileobj)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self.fileobj.read(2)
 
         if flag & FEXTRA:
             # Read & discard the extra field, if present
-            self._read_exact(struct.unpack("<H", self._read_exact(2)))
+            xlen = ord(self.fileobj.read(1))
+            xlen = xlen + 256*ord(self.fileobj.read(1))
+            self.fileobj.read(xlen)
         if flag & FNAME:
             # Read and discard a null-terminated string containing the filename
             while True:
@@ -216,7 +222,7 @@
                 if not s or s=='\000':
                     break
         if flag & FHCRC:
-            self._read_exact(2)     # Read & discard the 16-bit header CRC
+            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
 
     def write(self,data):
         self._check_closed()
@@ -232,9 +238,9 @@
             data = data.tobytes()
 
         if len(data) > 0:
-            self.size = self.size + len(data)
+            self.fileobj.write(self.compress.compress(data))
+            self.size += len(data)
             self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
-            self.fileobj.write( self.compress.compress(data) )
             self.offset += len(data)
 
         return len(data)
@@ -250,16 +256,20 @@
 
         readsize = 1024
         if size < 0:        # get the whole thing
-            while self._read(readsize):
-                readsize = min(self.max_read_chunk, readsize * 2)
-            size = self.extrasize
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
         else:               # just get some more of it
-            while size > self.extrasize:
-                if not self._read(readsize):
-                    if size > self.extrasize:
-                        size = self.extrasize
-                    break
-                readsize = min(self.max_read_chunk, readsize * 2)
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
 
         offset = self.offset - self.extrastart
         chunk = self.extrabuf[offset: offset + size]
@@ -274,7 +284,7 @@
 
     def _read(self, size=1024):
         if self.fileobj is None:
-            return False
+            raise EOFError, "Reached EOF"
 
         if self._new_member:
             # If the _new_member flag is set, we have to
@@ -285,7 +295,7 @@
             pos = self.fileobj.tell()   # Save current position
             self.fileobj.seek(0, 2)     # Seek to end of file
             if pos == self.fileobj.tell():
-                return False
+                raise EOFError, "Reached EOF"
             else:
                 self.fileobj.seek( pos ) # Return to original position
 
@@ -302,10 +312,9 @@
 
         if buf == "":
             uncompress = self.decompress.flush()
-            self.fileobj.seek(-len(self.decompress.unused_data), 1)
             self._read_eof()
             self._add_read_data( uncompress )
-            return False
+            raise EOFError, 'Reached EOF'
 
         uncompress = self.decompress.decompress(buf)
         self._add_read_data( uncompress )
@@ -315,14 +324,13 @@
             # so seek back to the start of the unused data, finish up
             # this member, and read a new gzip header.
             # (The number of bytes to seek back is the length of the unused
-            # data)
-            self.fileobj.seek(-len(self.decompress.unused_data), 1)
+            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
 
             # Check the CRC and file size, and set the flag so we read
             # a new member on the next call
             self._read_eof()
             self._new_member = True
-        return True
 
     def _add_read_data(self, data):
         self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
@@ -333,11 +341,14 @@
         self.size = self.size + len(data)
 
     def _read_eof(self):
-        # We've read to the end of the file.
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
         # We check the that the computed CRC and size of the
         # uncompressed data matches the stored values.  Note that the size
         # stored is the true file size mod 2**32.
-        crc32, isize = struct.unpack("<II", self._read_exact(8))
+        self.fileobj.seek(-8, 1)
+        crc32 = read32(self.fileobj)
+        isize = read32(self.fileobj)  # may exceed 2GB
         if crc32 != self.crc:
             raise IOError("CRC check failed %s != %s" % (hex(crc32),
                                                          hex(self.crc)))
@@ -358,19 +369,21 @@
         return self.fileobj is None
 
     def close(self):
-        if self.fileobj is None:
+        fileobj = self.fileobj
+        if fileobj is None:
             return
-        if self.mode == WRITE:
-            self.fileobj.write(self.compress.flush())
-            write32u(self.fileobj, self.crc)
-            # self.size may exceed 2GB, or even 4GB
-            write32u(self.fileobj, self.size & 0xffffffffL)
-            self.fileobj = None
-        elif self.mode == READ:
-            self.fileobj = None
-        if self.myfileobj:
-            self.myfileobj.close()
-            self.myfileobj = None
+        self.fileobj = None
+        try:
+            if self.mode == WRITE:
+                fileobj.write(self.compress.flush())
+                write32u(fileobj, self.crc)
+                # self.size may exceed 2GB, or even 4GB
+                write32u(fileobj, self.size & 0xffffffffL)
+        finally:
+            myfileobj = self.myfileobj
+            if myfileobj:
+                self.myfileobj = None
+                myfileobj.close()
 
     def __enter__(self):
         # __enter__ is defined in _jyio._IOBase (aka
@@ -381,20 +394,11 @@
         self._check_closed()
         return self
 
-    __iter__ = __enter__
-
-    if not sys.platform.startswith('java'):
-        def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
-            self._check_closed()
-            if self.mode == WRITE:
-                # Ensure the compressor's buffer is flushed
-                self.fileobj.write(self.compress.flush(zlib_mode))
-                self.fileobj.flush()
-    else:
-        # Java lacks Z_SYNC_FLUSH; thus Jython can't flush the
-        # compressobj until EOF
-        def flush(self,zlib_mode=None):
-            self._check_closed()
+    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
+        self._check_closed()
+        if self.mode == WRITE:
+            # Ensure the compressor's buffer is flushed
+            self.fileobj.write(self.compress.flush(zlib_mode))
             self.fileobj.flush()
 
     def fileno(self):
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -485,6 +485,8 @@
 if is_jython:
     # Jython disallows @ in module names
     TESTFN = '$test'
+    TESTFN_UNICODE = "$test-\xe0\xf2"
+    TESTFN_ENCODING = sys.getfilesystemencoding()
 elif os.name == 'riscos':
     TESTFN = 'testfile'
 else:
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
deleted file mode 100644
--- a/Lib/test/test_zlib.py
+++ /dev/null
@@ -1,636 +0,0 @@
-import unittest
-from test.test_support import TESTFN, run_unittest, import_module, unlink, requires
-import binascii
-import random
-from test.test_support import precisionbigmemtest, _1G, _4G, is_jython
-import sys
-
-try:
-    import mmap
-except ImportError:
-    mmap = None
-
-zlib = import_module('zlib')
-
-
-class ChecksumTestCase(unittest.TestCase):
-    # checksum test cases
-    def test_crc32start(self):
-        self.assertEqual(zlib.crc32(""), zlib.crc32("", 0))
-        self.assertTrue(zlib.crc32("abc", 0xffffffff))
-
-    def test_crc32empty(self):
-        self.assertEqual(zlib.crc32("", 0), 0)
-        self.assertEqual(zlib.crc32("", 1), 1)
-        self.assertEqual(zlib.crc32("", 432), 432)
-
-    def test_adler32(self):
-        self.assertEqual(zlib.adler32(""), zlib.adler32("", 1))
-
-    @unittest.skipIf(is_jython, "jython uses java.util.zip.Adler32, \
-                which does not support a start value other than 1")
-    def test_adler32start(self):
-        self.assertTrue(zlib.adler32("abc", 0xffffffff))
-
-    def test_adler32empty(self):
-        self.assertEqual(zlib.adler32("", 1), 1)
-
-    @unittest.skipIf(is_jython, "jython uses java.util.zip.Adler32, \
-                which does not support a start value other than 1")
-    def test_adler32empty_start(self):
-        self.assertEqual(zlib.adler32("", 0), 0)
-        self.assertEqual(zlib.adler32("", 432), 432)
-
-    def assertEqual32(self, seen, expected):
-        # 32-bit values masked -- checksums on 32- vs 64- bit machines
-        # This is important if bit 31 (0x08000000L) is set.
-        self.assertEqual(seen & 0x0FFFFFFFFL, expected & 0x0FFFFFFFFL)
-
-    def test_penguins(self):
-        self.assertEqual32(zlib.crc32("penguin", 0), 0x0e5c1a120L)
-        self.assertEqual32(zlib.crc32("penguin", 1), 0x43b6aa94)
-        self.assertEqual32(zlib.adler32("penguin", 1), 0x0bd602f7)
-
-        self.assertEqual(zlib.crc32("penguin"), zlib.crc32("penguin", 0))
-        self.assertEqual(zlib.adler32("penguin"),zlib.adler32("penguin",1))
-
-    @unittest.skipIf(is_jython, "jython uses java.util.zip.Adler32, \
-                which does not support a start value other than 1")
-    def test_penguins_start(self):
-        self.assertEqual32(zlib.adler32("penguin", 0), 0x0bcf02f6)
-
-    def test_abcdefghijklmnop(self):
-        """test issue1202 compliance: signed crc32, adler32 in 2.x"""
-        foo = 'abcdefghijklmnop'
-        # explicitly test signed behavior
-        self.assertEqual(zlib.crc32(foo), -1808088941)
-        self.assertEqual(zlib.crc32('spam'), 1138425661)
-        self.assertEqual(zlib.adler32(foo+foo), -721416943)
-        self.assertEqual(zlib.adler32('spam'), 72286642)
-
-    def test_same_as_binascii_crc32(self):
-        foo = 'abcdefghijklmnop'
-        self.assertEqual(binascii.crc32(foo), zlib.crc32(foo))
-        self.assertEqual(binascii.crc32('spam'), zlib.crc32('spam'))
-
-    def test_negative_crc_iv_input(self):
-        # The range of valid input values for the crc state should be
-        # -2**31 through 2**32-1 to allow inputs artifically constrained
-        # to a signed 32-bit integer.
-        self.assertEqual(zlib.crc32('ham', -1), zlib.crc32('ham', 0xffffffffL))
-        self.assertEqual(zlib.crc32('spam', -3141593),
-                         zlib.crc32('spam',  0xffd01027L))
-        self.assertEqual(zlib.crc32('spam', -(2**31)),
-                         zlib.crc32('spam',  (2**31)))
-
-
-class ExceptionTestCase(unittest.TestCase):
-    # make sure we generate some expected errors
-    def test_badlevel(self):
-        # specifying compression level out of range causes an error
-        # (but -1 is Z_DEFAULT_COMPRESSION and apparently the zlib
-        # accepts 0 too)
-        self.assertRaises(zlib.error, zlib.compress, 'ERROR', 10)
-
-    def test_badcompressobj(self):
-        # verify failure on building compress object with bad params
-        self.assertRaises(ValueError, zlib.compressobj, 1, zlib.DEFLATED, 0)
-        # specifying total bits too large causes an error
-        self.assertRaises(ValueError,
-                zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1)
-
-    def test_baddecompressobj(self):
-        # verify failure on building decompress object with bad params
-        self.assertRaises(ValueError, zlib.decompressobj, -1)
-
-    def test_decompressobj_badflush(self):
-        # verify failure on calling decompressobj.flush with bad params
-        self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
-        self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
-
-
-class BaseCompressTestCase(object):
-
-    def check_big_compress_buffer(self, size, compress_func):
-        _1M = 1024 * 1024
-        if not is_jython:
-            # Generate 10MB worth of random, and expand it by repeating it.
-            # The assumption is that zlib's memory is not big enough to exploit
-            # such spread out redundancy.
-            fmt = "%%0%dx" % (2 * _1M)
-            data = ''.join([binascii.a2b_hex(fmt % random.getrandbits(8 * _1M))
-                            for i in range(10)])
-            data = data * (size // len(data) + 1)
-        else:
-            #
-            # The original version of this test passes fine on cpython,
-            # but appears to hang on jython, because of the time taken to
-            # format a very large integer as a hexadecimal string.
-            # See this issue for details
-            # http://bugs.jython.org/issue2013
-            # Since testing string formatting is not the purpose of the test
-            # it is necessary to generate the random test data in a different
-            # way on jython. (There may be a better way than what I have 
-            # implemented here)
-            #
-            from java.math import BigInteger
-            from java.util import Random
-            num_bits = 8 * _1M # causes "java.lang.OutOfMemoryError: Java heap space"
-            num_bits = _1M
-            data = ''.join([str(BigInteger((num_bits), Random()).toByteArray())
-                            for i in range(10)])
-        try:
-            compress_func(data)
-        finally:
-            # Release memory
-            data = None
-
-    def check_big_decompress_buffer(self, size, decompress_func):
-        data = 'x' * size
-        try:
-            compressed = zlib.compress(data, 1)
-        finally:
-            # Release memory
-            data = None
-        data = decompress_func(compressed)
-        # Sanity check
-        try:
-            self.assertEqual(len(data), size)
-            self.assertEqual(len(data.strip('x')), 0)
-        finally:
-            data = None
-
-
-class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
-    # Test compression in one go (whole message compression)
-    def test_speech(self):
-        x = zlib.compress(HAMLET_SCENE)
-        self.assertEqual(zlib.decompress(x), HAMLET_SCENE)
-
-    def test_speech128(self):
-        # compress more data
-        data = HAMLET_SCENE * 128
-        x = zlib.compress(data)
-        self.assertEqual(zlib.decompress(x), data)
-
-    @unittest.skipIf(is_jython, "jython uses java.util.zip.Inflater, \
-                which accepts incomplete streams without error")
-    def test_incomplete_stream(self):
-        # An useful error message is given
-        x = zlib.compress(HAMLET_SCENE)
-        self.assertRaisesRegexp(zlib.error,
-            "Error -5 while decompressing data: incomplete or truncated stream",
-            zlib.decompress, x[:-1])
-
-    # Memory use of the following functions takes into account overallocation
-
-    @precisionbigmemtest(size=_1G + 1024 * 1024, memuse=3)
-    def test_big_compress_buffer(self, size):
-        compress = lambda s: zlib.compress(s, 1)
-        self.check_big_compress_buffer(size, compress)
-
-    @precisionbigmemtest(size=_1G + 1024 * 1024, memuse=2)
-    def test_big_decompress_buffer(self, size):
-        """
-        This is NOT testing for a 'size=_1G + 1024 * 1024', because of the definition of 
-        the precisionbigmemtest decorator, which resets the value to 5147, based on 
-        the definition of test_support.real_max_memuse == 0
-        This is the case on my windows installation of python 2.7.3.
-        Python 2.7.3 (default, Apr 10 2012, 23:31:26) [MSC v.1500 32 bit (Intel)] on win32
-        And on my build of jython 2.7
-        Jython 2.7b1+ (default:d5a22e9b622a, Feb 9 2013, 20:36:27)
-        [Java HotSpot(TM) Client VM (Sun Microsystems Inc.)] on java1.6.0_29
-        """
-        self.check_big_decompress_buffer(size, zlib.decompress)
-
-
-class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
-    # Test compression object
-    def test_pair(self):
-        # straightforward compress/decompress objects
-        data = HAMLET_SCENE * 128
-        co = zlib.compressobj()
-        x1 = co.compress(data)
-        x2 = co.flush()
-        self.assertRaises(zlib.error, co.flush) # second flush should not work
-        dco = zlib.decompressobj()
-        y1 = dco.decompress(x1 + x2)
-        y2 = dco.flush()
-        self.assertEqual(data, y1 + y2)
-
-    def test_compressoptions(self):
-        # specify lots of options to compressobj()
-        level = 2
-        method = zlib.DEFLATED
-        wbits = -12
-        memlevel = 9
-        strategy = zlib.Z_FILTERED
-        co = zlib.compressobj(level, method, wbits, memlevel, strategy)
-        x1 = co.compress(HAMLET_SCENE)
-        x2 = co.flush()
-        dco = zlib.decompressobj(wbits)
-        y1 = dco.decompress(x1 + x2)
-        y2 = dco.flush()
-        self.assertEqual(HAMLET_SCENE, y1 + y2)
-
-    def test_compressincremental(self):
-        # compress object in steps, decompress object as one-shot
-        data = HAMLET_SCENE * 128
-        co = zlib.compressobj()
-        bufs = []
-        for i in range(0, len(data), 256):
-            bufs.append(co.compress(data[i:i+256]))
-        bufs.append(co.flush())
-        combuf = ''.join(bufs)
-
-        dco = zlib.decompressobj()
-        y1 = dco.decompress(''.join(bufs))
-        y2 = dco.flush()
-        self.assertEqual(data, y1 + y2)
-
-    def test_decompinc(self, flush=False, source=None, cx=256, dcx=64):
-        # compress object in steps, decompress object in steps
-        source = source or HAMLET_SCENE
-        data = source * 128
-        co = zlib.compressobj()
-        bufs = []
-        for i in range(0, len(data), cx):
-            bufs.append(co.compress(data[i:i+cx]))
-        bufs.append(co.flush())
-        combuf = ''.join(bufs)
-
-        self.assertEqual(data, zlib.decompress(combuf))
-
-        dco = zlib.decompressobj()
-        bufs = []
-        for i in range(0, len(combuf), dcx):
-            bufs.append(dco.decompress(combuf[i:i+dcx]))
-            self.assertEqual('', dco.unconsumed_tail, ########
-                             "(A) uct should be '': not %d long" %
-                                       len(dco.unconsumed_tail))
-        if flush:
-            bufs.append(dco.flush())
-        else:
-            while True:
-                chunk = dco.decompress('')
-                if chunk:
-                    bufs.append(chunk)
-                else:
-                    break
-        self.assertEqual('', dco.unconsumed_tail, ########
-                         "(B) uct should be '': not %d long" %
-                                       len(dco.unconsumed_tail))
-        self.assertEqual(data, ''.join(bufs))
-        # Failure means: "decompressobj with init options failed"
-
-    def test_decompincflush(self):
-        self.test_decompinc(flush=True)
-
-    def test_decompimax(self, source=None, cx=256, dcx=64):
-        # compress in steps, decompress in length-restricted steps
-        source = source or HAMLET_SCENE
-        # Check a decompression object with max_length specified
-        data = source * 128
-        co = zlib.compressobj()
-        bufs = []
-        for i in range(0, len(data), cx):
-            bufs.append(co.compress(data[i:i+cx]))
-        bufs.append(co.flush())
-        combuf = ''.join(bufs)
-        self.assertEqual(data, zlib.decompress(combuf),
-                         'compressed data failure')
-
-        dco = zlib.decompressobj()
-        bufs = []
-        cb = combuf
-        while cb:
-            #max_length = 1 + len(cb)//10
-            chunk = dco.decompress(cb, dcx)
-            self.assertFalse(len(chunk) > dcx,
-                    'chunk too big (%d>%d)' % (len(chunk), dcx))
-            bufs.append(chunk)
-            cb = dco.unconsumed_tail
-        bufs.append(dco.flush())
-        self.assertEqual(data, ''.join(bufs), 'Wrong data retrieved')
-
-    def test_decompressmaxlen(self, flush=False):
-        # Check a decompression object with max_length specified
-        data = HAMLET_SCENE * 128
-        co = zlib.compressobj()
-        bufs = []
-        for i in range(0, len(data), 256):
-            bufs.append(co.compress(data[i:i+256]))
-        bufs.append(co.flush())
-        combuf = ''.join(bufs)
-        self.assertEqual(data, zlib.decompress(combuf),
-                         'compressed data failure')
-
-        dco = zlib.decompressobj()
-        bufs = []
-        cb = combuf
-        while cb:
-            max_length = 1 + len(cb)//10
-            chunk = dco.decompress(cb, max_length)
-            self.assertFalse(len(chunk) > max_length,
-                        'chunk too big (%d>%d)' % (len(chunk),max_length))
-            bufs.append(chunk)
-            cb = dco.unconsumed_tail
-        if flush:
-            bufs.append(dco.flush())
-        else:
-            while chunk:
-                chunk = dco.decompress('', max_length)
-                self.assertFalse(len(chunk) > max_length,
-                            'chunk too big (%d>%d)' % (len(chunk),max_length))
-                bufs.append(chunk)
-        self.assertEqual(data, ''.join(bufs), 'Wrong data retrieved')
-
-    def test_decompressmaxlenflush(self):
-        self.test_decompressmaxlen(flush=True)
-
-    def test_maxlenmisc(self):
-        # Misc tests of max_length
-        dco = zlib.decompressobj()
-        self.assertRaises(ValueError, dco.decompress, "", -1)
-        self.assertEqual('', dco.unconsumed_tail)
-
-    def test_clear_unconsumed_tail(self):
-        # Issue #12050: calling decompress() without providing max_length
-        # should clear the unconsumed_tail attribute.
-        cdata = "x\x9cKLJ\x06\x00\x02M\x01"     # "abc"
-        dco = zlib.decompressobj()
-        ddata = dco.decompress(cdata, 1)
-        ddata += dco.decompress(dco.unconsumed_tail)
-        self.assertEqual(dco.unconsumed_tail, "")
-
-    def test_flushes(self):
-        # Test flush() with the various options, using all the
-        # different levels in order to provide more variations.
-        sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']
-        sync_opt = [getattr(zlib, opt) for opt in sync_opt
-                    if hasattr(zlib, opt)]
-        data = HAMLET_SCENE * 8
-
-        for sync in sync_opt:
-            for level in range(10):
-                obj = zlib.compressobj( level )
-                a = obj.compress( data[:3000] )
-                b = obj.flush( sync )
-                c = obj.compress( data[3000:] )
-                d = obj.flush()
-                self.assertEqual(zlib.decompress(''.join([a,b,c,d])),
-                                 data, ("Decompress failed: flush "
-                                        "mode=%i, level=%i") % (sync, level))
-                del obj
-
-    def test_odd_flush(self):
-        # Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1
-        import random
-
-        if hasattr(zlib, 'Z_SYNC_FLUSH'):
-            # Testing on 17K of "random" data
-
-            # Create compressor and decompressor objects
-            co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
-            dco = zlib.decompressobj()
-
-            # Try 17K of data
-            # generate random data stream
-            try:
-                # In 2.3 and later, WichmannHill is the RNG of the bug report
-                gen = random.WichmannHill()
-            except AttributeError:
-                try:
-                    # 2.2 called it Random
-                    gen = random.Random()
-                except AttributeError:
-                    # others might simply have a single RNG
-                    gen = random
-            gen.seed(1)
-            data = genblock(1, 17 * 1024, generator=gen)
-
-            # compress, sync-flush, and decompress
-            first = co.compress(data)
-            second = co.flush(zlib.Z_SYNC_FLUSH)
-            expanded = dco.decompress(first + second)
-
-            # if decompressed data is different from the input data, choke.
-            self.assertEqual(expanded, data, "17K random source doesn't match")
-
-    def test_empty_flush(self):
-        # Test that calling .flush() on unused objects works.
-        # (Bug #1083110 -- calling .flush() on decompress objects
-        # caused a core dump.)
-
-        co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
-        self.assertTrue(co.flush())  # Returns a zlib header
-        dco = zlib.decompressobj()
-        self.assertEqual(dco.flush(), "") # Returns nothing
-
-    def test_decompress_incomplete_stream(self):
-        # This is 'foo', deflated
-        x = 'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'
-        # For the record
-        self.assertEqual(zlib.decompress(x), 'foo')
-        if not is_jython:
-            # There is inconsistency between cpython zlib.decompress (which does not accept 
-            # incomplete streams) and zlib.decompressobj().decompress (which does accept
-            # incomplete streams, the whole point of this test)
-            # On jython, both zlib.decompress and zlib.decompressobject().decompress behave
-            # the same way: they both accept incomplete streams.
-            # Therefore, imposing this precondition is cpython specific
-            # and not appropriate on jython, which has consistent behaviour.
-            # http://bugs.python.org/issue8672
-            # http://bugs.jython.org/issue1859
-            self.assertRaises(zlib.error, zlib.decompress, x[:-5])
-        # Omitting the stream end works with decompressor objects
-        # (see issue #8672).
-        dco = zlib.decompressobj()
-        y = dco.decompress(x[:-5])
-        y += dco.flush()
-        self.assertEqual(y, 'foo')
-
-    if hasattr(zlib.compressobj(), "copy"):
-        def test_compresscopy(self):
-            # Test copying a compression object
-            data0 = HAMLET_SCENE
-            data1 = HAMLET_SCENE.swapcase()
-            c0 = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
-            bufs0 = []
-            bufs0.append(c0.compress(data0))
-
-            c1 = c0.copy()
-            bufs1 = bufs0[:]
-
-            bufs0.append(c0.compress(data0))
-            bufs0.append(c0.flush())
-            s0 = ''.join(bufs0)
-
-            bufs1.append(c1.compress(data1))
-            bufs1.append(c1.flush())
-            s1 = ''.join(bufs1)
-
-            self.assertEqual(zlib.decompress(s0),data0+data0)
-            self.assertEqual(zlib.decompress(s1),data0+data1)
-
-        def test_badcompresscopy(self):
-            # Test copying a compression object in an inconsistent state
-            c = zlib.compressobj()
-            c.compress(HAMLET_SCENE)
-            c.flush()
-            self.assertRaises(ValueError, c.copy)
-
-    if hasattr(zlib.decompressobj(), "copy"):
-        def test_decompresscopy(self):
-            # Test copying a decompression object
-            data = HAMLET_SCENE
-            comp = zlib.compress(data)
-
-            d0 = zlib.decompressobj()
-            bufs0 = []
-            bufs0.append(d0.decompress(comp[:32]))
-
-            d1 = d0.copy()
-            bufs1 = bufs0[:]
-
-            bufs0.append(d0.decompress(comp[32:]))
-            s0 = ''.join(bufs0)
-
-            bufs1.append(d1.decompress(comp[32:]))
-            s1 = ''.join(bufs1)
-
-            self.assertEqual(s0,s1)
-            self.assertEqual(s0,data)
-
-        def test_baddecompresscopy(self):
-            # Test copying a compression object in an inconsistent state
-            data = zlib.compress(HAMLET_SCENE)
-            d = zlib.decompressobj()
-            d.decompress(data)
-            d.flush()
-            self.assertRaises(ValueError, d.copy)
-
-    # Memory use of the following functions takes into account overallocation
-
-    @precisionbigmemtest(size=_1G + 1024 * 1024, memuse=3)
-    def test_big_compress_buffer(self, size):
-        c = zlib.compressobj(1)
-        compress = lambda s: c.compress(s) + c.flush()
-        self.check_big_compress_buffer(size, compress)
-
-    @precisionbigmemtest(size=_1G + 1024 * 1024, memuse=2)
-    def test_big_decompress_buffer(self, size):
-        """
-        This is NOT testing for a 'size=_1G + 1024 * 1024', because of the definition of 
-        the precisionbigmemtest decorator, which resets the value to 5147, based on 
-        the definition of test_support.real_max_memuse == 0
-        This is the case on my windows installation of python 2.7.3.
-        Python 2.7.3 (default, Apr 10 2012, 23:31:26) [MSC v.1500 32 bit (Intel)] on win32
-        And on my build of jython 2.7
-        Jython 2.7b1+ (default:d5a22e9b622a, Feb 9 2013, 20:36:27)
-        [Java HotSpot(TM) Client VM (Sun Microsystems Inc.)] on java1.6.0_29
-        """
-        d = zlib.decompressobj()
-        decompress = lambda s: d.decompress(s) + d.flush()
-        self.check_big_decompress_buffer(size, decompress)
-
-
-def genblock(seed, length, step=1024, generator=random):
-    """length-byte stream of random data from a seed (in step-byte blocks)."""
-    if seed is not None:
-        generator.seed(seed)
-    randint = generator.randint
-    if length < step or step < 2:
-        step = length
-    blocks = []
-    for i in range(0, length, step):
-        blocks.append(''.join([chr(randint(0,255))
-                               for x in range(step)]))
-    return ''.join(blocks)[:length]
-
-
-
-def choose_lines(source, number, seed=None, generator=random):
-    """Return a list of number lines randomly chosen from the source"""
-    if seed is not None:
-        generator.seed(seed)
-    sources = source.split('\n')
-    return [generator.choice(sources) for n in range(number)]
-
-
-
-HAMLET_SCENE = """
-LAERTES
-
-       O, fear me not.
-       I stay too long: but here my father comes.
-
-       Enter POLONIUS
-
-       A double blessing is a double grace,
-       Occasion smiles upon a second leave.
-
-LORD POLONIUS
-
-       Yet here, Laertes! aboard, aboard, for shame!
-       The wind sits in the shoulder of your sail,
-       And you are stay'd for. There; my blessing with thee!
-       And these few precepts in thy memory
-       See thou character. Give thy thoughts no tongue,
-       Nor any unproportioned thought his act.
-       Be thou familiar, but by no means vulgar.
-       Those friends thou hast, and their adoption tried,
-       Grapple them to thy soul with hoops of steel;
-       But do not dull thy palm with entertainment
-       Of each new-hatch'd, unfledged comrade. Beware
-       Of entrance to a quarrel, but being in,
-       Bear't that the opposed may beware of thee.
-       Give every man thy ear, but few thy voice;
-       Take each man's censure, but reserve thy judgment.
-       Costly thy habit as thy purse can buy,
-       But not express'd in fancy; rich, not gaudy;
-       For the apparel oft proclaims the man,
-       And they in France of the best rank and station
-       Are of a most select and generous chief in that.
-       Neither a borrower nor a lender be;
-       For loan oft loses both itself and friend,
-       And borrowing dulls the edge of husbandry.
-       This above all: to thine ownself be true,
-       And it must follow, as the night the day,
-       Thou canst not then be false to any man.
-       Farewell: my blessing season this in thee!
-
-LAERTES
-
-       Most humbly do I take my leave, my lord.
-
-LORD POLONIUS
-
-       The time invites you; go; your servants tend.
-
-LAERTES
-
-       Farewell, Ophelia; and remember well
-       What I have said to you.
-
-OPHELIA
-
-       'Tis in my memory lock'd,
-       And you yourself shall keep the key of it.
-
-LAERTES
-
-       Farewell.
-"""
-
-
-def test_main():
-    run_unittest(
-        ChecksumTestCase,
-        ExceptionTestCase,
-        CompressTestCase,
-        CompressObjectTestCase
-    )
-
-if __name__ == "__main__":
-    test_main()
diff --git a/Lib/zlib.py b/Lib/zlib.py
--- a/Lib/zlib.py
+++ b/Lib/zlib.py
@@ -16,10 +16,12 @@
 import array
 import binascii
 import jarray
+import struct
+import sys
 from cStringIO import StringIO
 
 from java.lang import Long, String, System
-from java.util.zip import Adler32, Deflater, Inflater, DataFormatException
+from java.util.zip import Adler32, CRC32, Deflater, Inflater, DataFormatException
 
 
 class error(Exception):
@@ -39,19 +41,37 @@
 Z_DEFAULT_COMPRESSION = -1
 Z_DEFAULT_STRATEGY = 0
 
-# Most options are removed because java does not support them
-# Z_NO_FLUSH = 0
-# Z_SYNC_FLUSH = 2
-# Z_FULL_FLUSH = 3
+Z_NO_FLUSH = 0
+Z_SYNC_FLUSH = 2
+Z_FULL_FLUSH = 3
 Z_FINISH = 4
-_valid_flush_modes = (Z_FINISH,)
+_valid_flush_modes = (Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH)
+
+_zlib_to_deflater = {
+    Z_NO_FLUSH: Deflater.NO_FLUSH,
+    Z_SYNC_FLUSH: Deflater.SYNC_FLUSH,
+    Z_FULL_FLUSH: Deflater.FULL_FLUSH
+}
+
+
+_ADLER_BASE = 65521  # largest prime smaller than 65536
 
 def adler32(s, value=1):
-    if value != 1:
-        raise ValueError, "adler32 only support start value of 1"
-    checksum = Adler32()
-    checksum.update(String.getBytes(s, 'iso-8859-1'))
-    return Long(checksum.getValue()).intValue()
+    # Although Java has an implmentation in java.util.zip.Adler32,
+    # this class does not allow for updating the value directly, as
+    # required by this C-style API.
+    #
+    # ported from https://tools.ietf.org/html/rfc2960#page-132
+    s1 = value & 0xffff
+    s2 = (value >> 16) & 0xffff
+    for c in s:
+        s1 = (s1 + ord(c)) % _ADLER_BASE
+        s2 = (s2 + s1)     % _ADLER_BASE
+    # Support two's complement, to comply with the range specified for 2.6+;
+    # for 3.x, simply return (s2 << 16) + s1
+    high_bit = -2147483648 if (s2 & 0x8000) else 0
+    remaining_high_word = s2 & 0x7fff
+    return high_bit + (remaining_high_word << 16) + s1
 
 def crc32(string, value=0):
     return binascii.crc32(string, value)
@@ -72,38 +92,80 @@
     inflater = Inflater(wbits < 0)
     try:
         inflater.setInput(_to_input(string))
-        return _get_inflate_data(inflater)
+        data = _get_inflate_data(inflater)
+        if not inflater.finished():
+            raise error, "Error -5 while decompressing data: incomplete or truncated stream"
+        return data
     finally:
         inflater.end()
 
 
+# per zlib manual (http://www.zlib.net/manual.html):
+
+# > windowBits can also be greater than 15 for optional gzip
+# > encoding. Add 16 to windowBits to write a simple gzip header and
+# > trailer around the compressed data instead of a zlib wrapper. The
+# > gzip header will have no file name, no extra data, no comment, no
+# > modification time (set to zero), no header crc, and the operating
+# > system will be set to 255 (unknown). If a gzip stream is being
+# > written, strm->adler is a crc32 instead of an adler32.
+
 class compressobj(object):
-    # all jython uses wbits for is deciding whether to skip the header if it's negative
+    # All jython uses wbits for is in deciding whether to skip the
+    # header if it's negative or to set gzip. But we still raise
+    # ValueError to get full test compliance.
+
+    GZIP_HEADER = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x04\x03"
+
+    # NB: this format is little-endian, not big-endian as we might
+    # expect for network oriented protocols, as specified by RFCs;
+    # CRC32.getValue() returns an unsigned int as a long, so cope
+    # accordingly
+    GZIP_TRAILER_FORMAT = struct.Struct("<Ii")  # crc32, size
+
     def __init__(self, level=6, method=DEFLATED, wbits=MAX_WBITS,
                        memLevel=0, strategy=0):
+        if abs(wbits) & 16:
+            if wbits > 0:
+                wbits -= 16
+            else:
+                wbits += 16
+            self._gzip = True
+        else:
+            self._gzip = False
         if abs(wbits) > MAX_WBITS or abs(wbits) < 8:
-            raise ValueError, "Invalid initialization option"
-        self.deflater = Deflater(level, wbits < 0)
+            raise ValueError, "Invalid initialization option: %s" % (wbits,)
+        self.deflater = Deflater(level, wbits < 0 or self._gzip)
         self.deflater.setStrategy(strategy)
-        if wbits < 0:
-            _get_deflate_data(self.deflater)
         self._ended = False
+        self._size = 0
+        self._crc32 = CRC32()
 
     def compress(self, string):
         if self._ended:
             raise error("compressobj may not be used after flush(Z_FINISH)")
         string = _to_input(string)
         self.deflater.setInput(string, 0, len(string))
-        return _get_deflate_data(self.deflater)
+        deflated = _get_deflate_data(self.deflater)
+        self._size += len(string)
+        self._crc32.update(string)
+        if self._gzip:
+            return self.GZIP_HEADER + deflated
+        else:
+            return deflated
 
     def flush(self, mode=Z_FINISH):
         if self._ended:
             raise error("compressobj may not be used after flush(Z_FINISH)")
         if mode not in _valid_flush_modes:
             raise ValueError, "Invalid flush option"
-        self.deflater.finish()
-        last = _get_deflate_data(self.deflater)
         if mode == Z_FINISH:
+            self.deflater.finish()
+        last = _get_deflate_data(self.deflater, mode)
+        if mode == Z_FINISH:
+            if self._gzip:
+                last += self.GZIP_TRAILER_FORMAT.pack(
+                    self._crc32.getValue(), self._size % sys.maxint)
             self.deflater.end()
             self._ended = True
         return last
@@ -138,15 +200,24 @@
         # unconsumed_tail is whatever input was not used because max_length
         # was exceeded before inflation finished.
         # Thus, at most one of {unused_data, unconsumed_tail} may be non-empty.
-        self.unused_data = ""
+
         self.unconsumed_tail = ""
+        if not self.inflater.finished() and not (self.gzip and not self.gzip_header_skipped):
+            self.unused_data = ""
 
         if max_length < 0:
             raise ValueError("max_length must be a positive integer")
 
         # Suppress gzip header if present and wbits < 0
         if self.gzip and not self.gzip_header_skipped:
-            string = _skip_gzip_header(string)
+            string = self.unused_data + string
+            self.unused_data = ""
+            try:
+                string = _skip_gzip_header(string)
+            except IndexError:
+                # need more input!
+                self.unused_data = string
+                return ""
             self.gzip_header_skipped = True
 
         string = _to_input(string)
@@ -156,15 +227,14 @@
 
         r = self.inflater.getRemaining()
         if r:
-            if max_length:
+            if max_length and not self.inflater.finished():
                 self.unconsumed_tail = string[-r:]
             else:
-                self.unused_data = string[-r:]
+                self.unused_data += string[-r:]
 
         return inflated
 
     def flush(self, length=None):
-        # FIXME close input streams if gzip
         if self._ended:
             raise error("decompressobj may not be used after flush()")
         if length is None:
@@ -175,15 +245,22 @@
         self.inflater.end()
         return last
 
-def _to_input(string):
-    return string.tostring() if isinstance(string, array.array) else string
+def _to_input(s):
+    if isinstance(s, unicode):
+        return s.encode('ascii')
+    if isinstance(s, array.array):
+        return s.tostring()
+    if isinstance(s, basestring) or isinstance(s, buffer) or isinstance(s, memoryview):
+        return s
+    else:
+        raise TypeError('must be string or read-only buffer, not %s' % type(s))
 
-def _get_deflate_data(deflater):
-    buf = jarray.zeros(1024, 'b')
+def _get_deflate_data(deflater, mode=Z_NO_FLUSH):
+    buflen = 1024
+    buf = jarray.zeros(buflen, 'b')
     s = StringIO()
     while not deflater.finished():
-        l = deflater.deflate(buf)
-
+        l = deflater.deflate(buf, 0, buflen, _zlib_to_deflater.get(mode, Deflater.NO_FLUSH))
         if l == 0:
             break
         s.write(String(buf, 0, 0, l))
@@ -222,7 +299,7 @@
 FCOMMENT = 16
 
 def _skip_gzip_header(string):
-    # per format specified in http://tools.ietf.org/html/rfc1952
+    # per format specified in https://tools.ietf.org/html/rfc1952
     
     # could we use bytearray instead?
     s = array.array("B", string)
diff --git a/lib-python/2.7/gzip.py b/lib-python/2.7/gzip.py
--- a/lib-python/2.7/gzip.py
+++ b/lib-python/2.7/gzip.py
@@ -21,6 +21,9 @@
     # or unsigned.
     output.write(struct.pack("<L", value))
 
+def read32(input):
+    return struct.unpack("<I", input.read(4))[0]
+
 def open(filename, mode="rb", compresslevel=9):
     """Shorthand for GzipFile(filename, mode, compresslevel).
 
@@ -161,9 +164,16 @@
     def _write_gzip_header(self):
         self.fileobj.write('\037\213')             # magic header
         self.fileobj.write('\010')                 # compression method
-        fname = os.path.basename(self.name)
-        if fname.endswith(".gz"):
-            fname = fname[:-3]
+        try:
+            # RFC 1952 requires the FNAME field to be Latin-1. Do not
+            # include filenames that cannot be represented that way.
+            fname = os.path.basename(self.name)
+            if not isinstance(fname, str):
+                fname = fname.encode('latin-1')
+            if fname.endswith('.gz'):
+                fname = fname[:-3]
+        except UnicodeEncodeError:
+            fname = ''
         flags = 0
         if fname:
             flags = FNAME
@@ -181,28 +191,24 @@
         self.crc = zlib.crc32("") & 0xffffffffL
         self.size = 0
 
-    def _read_exact(self, n):
-        data = self.fileobj.read(n)
-        while len(data) < n:
-            b = self.fileobj.read(n - len(data))
-            if not b:
-                raise EOFError("Compressed file ended before the "
-                               "end-of-stream marker was reached")
-            data += b
-        return data
-
     def _read_gzip_header(self):
         magic = self.fileobj.read(2)
         if magic != '\037\213':
             raise IOError, 'Not a gzipped file'
-
-        method, flag, self.mtime = struct.unpack("<BBIxx", self._read_exact(8))
+        method = ord( self.fileobj.read(1) )
         if method != 8:
             raise IOError, 'Unknown compression method'
+        flag = ord( self.fileobj.read(1) )
+        self.mtime = read32(self.fileobj)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self.fileobj.read(2)
 
         if flag & FEXTRA:
             # Read & discard the extra field, if present
-            self._read_exact(struct.unpack("<H", self._read_exact(2)))
+            xlen = ord(self.fileobj.read(1))
+            xlen = xlen + 256*ord(self.fileobj.read(1))
+            self.fileobj.read(xlen)
         if flag & FNAME:
             # Read and discard a null-terminated string containing the filename
             while True:
@@ -216,7 +222,7 @@
                 if not s or s=='\000':
                     break
         if flag & FHCRC:
-            self._read_exact(2)     # Read & discard the 16-bit header CRC
+            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
 
     def write(self,data):
         self._check_closed()
@@ -232,9 +238,9 @@
             data = data.tobytes()
 
         if len(data) > 0:
-            self.size = self.size + len(data)
+            self.fileobj.write(self.compress.compress(data))
+            self.size += len(data)
             self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
-            self.fileobj.write( self.compress.compress(data) )
             self.offset += len(data)
 
         return len(data)
@@ -250,16 +256,20 @@
 
         readsize = 1024
         if size < 0:        # get the whole thing
-            while self._read(readsize):
-                readsize = min(self.max_read_chunk, readsize * 2)
-            size = self.extrasize
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
         else:               # just get some more of it
-            while size > self.extrasize:
-                if not self._read(readsize):
-                    if size > self.extrasize:
-                        size = self.extrasize
-                    break
-                readsize = min(self.max_read_chunk, readsize * 2)
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
 
         offset = self.offset - self.extrastart
         chunk = self.extrabuf[offset: offset + size]
@@ -274,7 +284,7 @@
 
     def _read(self, size=1024):
         if self.fileobj is None:
-            return False
+            raise EOFError, "Reached EOF"
 
         if self._new_member:
             # If the _new_member flag is set, we have to
@@ -285,7 +295,7 @@
             pos = self.fileobj.tell()   # Save current position
             self.fileobj.seek(0, 2)     # Seek to end of file
             if pos == self.fileobj.tell():
-                return False
+                raise EOFError, "Reached EOF"
             else:
                 self.fileobj.seek( pos ) # Return to original position
 
@@ -302,10 +312,9 @@
 
         if buf == "":
             uncompress = self.decompress.flush()
-            self.fileobj.seek(-len(self.decompress.unused_data), 1)
             self._read_eof()
             self._add_read_data( uncompress )
-            return False
+            raise EOFError, 'Reached EOF'
 
         uncompress = self.decompress.decompress(buf)
         self._add_read_data( uncompress )
@@ -315,14 +324,13 @@
             # so seek back to the start of the unused data, finish up
             # this member, and read a new gzip header.
             # (The number of bytes to seek back is the length of the unused
-            # data)
-            self.fileobj.seek(-len(self.decompress.unused_data), 1)
+            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
 
             # Check the CRC and file size, and set the flag so we read
             # a new member on the next call
             self._read_eof()
             self._new_member = True
-        return True
 
     def _add_read_data(self, data):
         self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
@@ -333,11 +341,14 @@
         self.size = self.size + len(data)
 
     def _read_eof(self):
-        # We've read to the end of the file.
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
         # We check the that the computed CRC and size of the
         # uncompressed data matches the stored values.  Note that the size
         # stored is the true file size mod 2**32.
-        crc32, isize = struct.unpack("<II", self._read_exact(8))
+        self.fileobj.seek(-8, 1)
+        crc32 = read32(self.fileobj)
+        isize = read32(self.fileobj)  # may exceed 2GB
         if crc32 != self.crc:
             raise IOError("CRC check failed %s != %s" % (hex(crc32),
                                                          hex(self.crc)))
@@ -358,19 +369,21 @@
         return self.fileobj is None
 
     def close(self):
-        if self.fileobj is None:
+        fileobj = self.fileobj
+        if fileobj is None:
             return
-        if self.mode == WRITE:
-            self.fileobj.write(self.compress.flush())
-            write32u(self.fileobj, self.crc)
-            # self.size may exceed 2GB, or even 4GB
-            write32u(self.fileobj, self.size & 0xffffffffL)
-            self.fileobj = None
-        elif self.mode == READ:
-            self.fileobj = None
-        if self.myfileobj:
-            self.myfileobj.close()
-            self.myfileobj = None
+        self.fileobj = None
+        try:
+            if self.mode == WRITE:
+                fileobj.write(self.compress.flush())
+                write32u(fileobj, self.crc)
+                # self.size may exceed 2GB, or even 4GB
+                write32u(fileobj, self.size & 0xffffffffL)
+        finally:
+            myfileobj = self.myfileobj
+            if myfileobj:
+                self.myfileobj = None
+                myfileobj.close()
 
     def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
         self._check_closed()
diff --git a/lib-python/2.7/test/test_gzip.py b/lib-python/2.7/test/test_gzip.py
--- a/lib-python/2.7/test/test_gzip.py
+++ b/lib-python/2.7/test/test_gzip.py
@@ -1,4 +1,3 @@
-#! /usr/bin/env python
 """Test script for the gzip module.
 """
 
@@ -31,6 +30,30 @@
     def tearDown(self):
         test_support.unlink(self.filename)
 
+    def write_and_read_back(self, data, mode='b'):
+        b_data = memoryview(data).tobytes()
+        with gzip.GzipFile(self.filename, 'w'+mode) as f:
+            l = f.write(data)
+        self.assertEqual(l, len(b_data))
+        with gzip.GzipFile(self.filename, 'r'+mode) as f:
+            self.assertEqual(f.read(), b_data)
+
+    @test_support.requires_unicode
+    def test_unicode_filename(self):
+        unicode_filename = test_support.TESTFN_UNICODE
+        try:
+            unicode_filename.encode(test_support.TESTFN_ENCODING)
+        except (UnicodeError, TypeError):
+            self.skipTest("Requires unicode filenames support")
+        self.filename = unicode_filename
+        with gzip.GzipFile(unicode_filename, "wb") as f:
+            f.write(data1 * 50)
+        with gzip.GzipFile(unicode_filename, "rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
+        # Sanity check that we are actually operating on the right file.
+        with open(unicode_filename, 'rb') as fobj, \
+             gzip.GzipFile(fileobj=fobj, mode="rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
 
     def test_write(self):
         with gzip.GzipFile(self.filename, 'wb') as f:
@@ -46,6 +69,25 @@
         # Test multiple close() calls.
         f.close()
 
+    # The following test_write_xy methods test that write accepts
+    # the corresponding bytes-like object type as input
+    # and that the data written equals bytes(xy) in all cases.
+    def test_write_memoryview(self):
+        self.write_and_read_back(memoryview(data1 * 50))
+
+    def test_write_incompatible_type(self):
+        # Test that non-bytes-like types raise TypeError.
+        # Issue #21560: attempts to write incompatible types
+        # should not affect the state of the fileobject
+        with gzip.GzipFile(self.filename, 'wb') as f:
+            with self.assertRaises(UnicodeEncodeError):
+                f.write(u'\xff')
+            with self.assertRaises(TypeError):
+                f.write([1])
+            f.write(data1)
+        with gzip.GzipFile(self.filename, 'rb') as f:
+            self.assertEqual(f.read(), data1)
+
     def test_read(self):
         self.test_write()
         # Try reading.
@@ -289,23 +331,13 @@
             with gzip.GzipFile(fileobj=f, mode="w") as g:
                 self.assertEqual(g.name, "")
 
-    def test_read_truncated(self):
-        data = data1*50
-        buf = io.BytesIO()
-        with gzip.GzipFile(fileobj=buf, mode="w") as f:
-            f.write(data)
-        # Drop the CRC (4 bytes) and file size (4 bytes).
-        truncated = buf.getvalue()[:-8]
-        with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
-            self.assertRaises(EOFError, f.read)
-        with gzip.GzipFile(fileobj=io.BytesIO(truncated)) as f:
-            self.assertEqual(f.read(len(data)), data)
-            self.assertRaises(EOFError, f.read, 1)
-        # Incomplete 10-byte header.
-        for i in range(2, 10):
-            with gzip.GzipFile(fileobj=io.BytesIO(truncated[:i])) as f:
-                self.assertRaises(EOFError, f.read, 1)
-
+    def test_read_with_extra(self):
+        # Gzip data with an extra field
+        gzdata = (b'\x1f\x8b\x08\x04\xb2\x17cQ\x02\xff'
+                  b'\x05\x00Extra'
+                  b'\x0bI-.\x01\x002\xd1Mx\x04\x00\x00\x00')
+        with gzip.GzipFile(fileobj=io.BytesIO(gzdata)) as f:
+            self.assertEqual(f.read(), b'Test')
 
 def test_main(verbose=None):
     test_support.run_unittest(TestGzip)
diff --git a/lib-python/2.7/test/test_zlib.py b/lib-python/2.7/test/test_zlib.py
--- a/lib-python/2.7/test/test_zlib.py
+++ b/lib-python/2.7/test/test_zlib.py
@@ -12,6 +12,13 @@
 
 zlib = import_module('zlib')
 
+requires_Compress_copy = unittest.skipUnless(
+        hasattr(zlib.compressobj(), "copy"),
+        'requires Compress.copy()')
+requires_Decompress_copy = unittest.skipUnless(
+        hasattr(zlib.decompressobj(), "copy"),
+        'requires Decompress.copy()')
+
 
 class ChecksumTestCase(unittest.TestCase):
     # checksum test cases
@@ -339,39 +346,39 @@
                                         "mode=%i, level=%i") % (sync, level))
                 del obj
 
+    @unittest.skipUnless(hasattr(zlib, 'Z_SYNC_FLUSH'),
+                         'requires zlib.Z_SYNC_FLUSH')
     def test_odd_flush(self):
         # Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1
         import random
+        # Testing on 17K of "random" data
 
-        if hasattr(zlib, 'Z_SYNC_FLUSH'):
-            # Testing on 17K of "random" data
+        # Create compressor and decompressor objects
+        co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
+        dco = zlib.decompressobj()
 
-            # Create compressor and decompressor objects
-            co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
-            dco = zlib.decompressobj()
+        # Try 17K of data
+        # generate random data stream
+        try:
+            # In 2.3 and later, WichmannHill is the RNG of the bug report
+            gen = random.WichmannHill()
+        except AttributeError:
+            try:
+                # 2.2 called it Random
+                gen = random.Random()
+            except AttributeError:
+                # others might simply have a single RNG
+                gen = random
+        gen.seed(1)
+        data = genblock(1, 17 * 1024, generator=gen)
 
-            # Try 17K of data
-            # generate random data stream
-            try:
-                # In 2.3 and later, WichmannHill is the RNG of the bug report
-                gen = random.WichmannHill()
-            except AttributeError:
-                try:
-                    # 2.2 called it Random
-                    gen = random.Random()
-                except AttributeError:
-                    # others might simply have a single RNG
-                    gen = random
-            gen.seed(1)
-            data = genblock(1, 17 * 1024, generator=gen)
+        # compress, sync-flush, and decompress
+        first = co.compress(data)
+        second = co.flush(zlib.Z_SYNC_FLUSH)
+        expanded = dco.decompress(first + second)
 
-            # compress, sync-flush, and decompress
-            first = co.compress(data)
-            second = co.flush(zlib.Z_SYNC_FLUSH)
-            expanded = dco.decompress(first + second)
-
-            # if decompressed data is different from the input data, choke.
-            self.assertEqual(expanded, data, "17K random source doesn't match")
+        # if decompressed data is different from the input data, choke.
+        self.assertEqual(expanded, data, "17K random source doesn't match")
 
     def test_empty_flush(self):
         # Test that calling .flush() on unused objects works.
@@ -408,35 +415,36 @@
         data = zlib.compress(input2)
         self.assertEqual(dco.flush(), input1[1:])
 
-    if hasattr(zlib.compressobj(), "copy"):
-        def test_compresscopy(self):
-            # Test copying a compression object
-            data0 = HAMLET_SCENE
-            data1 = HAMLET_SCENE.swapcase()
-            c0 = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
-            bufs0 = []
-            bufs0.append(c0.compress(data0))
+    @requires_Compress_copy
+    def test_compresscopy(self):
+        # Test copying a compression object
+        data0 = HAMLET_SCENE
+        data1 = HAMLET_SCENE.swapcase()
+        c0 = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
+        bufs0 = []
+        bufs0.append(c0.compress(data0))
 
-            c1 = c0.copy()
-            bufs1 = bufs0[:]
+        c1 = c0.copy()
+        bufs1 = bufs0[:]
 
-            bufs0.append(c0.compress(data0))
-            bufs0.append(c0.flush())
-            s0 = ''.join(bufs0)
+        bufs0.append(c0.compress(data0))
+        bufs0.append(c0.flush())
+        s0 = ''.join(bufs0)
 
-            bufs1.append(c1.compress(data1))
-            bufs1.append(c1.flush())
-            s1 = ''.join(bufs1)
+        bufs1.append(c1.compress(data1))
+        bufs1.append(c1.flush())
+        s1 = ''.join(bufs1)
 
-            self.assertEqual(zlib.decompress(s0),data0+data0)
-            self.assertEqual(zlib.decompress(s1),data0+data1)
+        self.assertEqual(zlib.decompress(s0),data0+data0)
+        self.assertEqual(zlib.decompress(s1),data0+data1)
 
-        def test_badcompresscopy(self):
-            # Test copying a compression object in an inconsistent state
-            c = zlib.compressobj()
-            c.compress(HAMLET_SCENE)
-            c.flush()
-            self.assertRaises(ValueError, c.copy)
+    @requires_Compress_copy
+    def test_badcompresscopy(self):
+        # Test copying a compression object in an inconsistent state
+        c = zlib.compressobj()
+        c.compress(HAMLET_SCENE)
+        c.flush()
+        self.assertRaises(ValueError, c.copy)
 
     def test_decompress_unused_data(self):
         # Repeated calls to decompress() after EOF should accumulate data in
@@ -463,35 +471,36 @@
                 self.assertEqual(dco.unconsumed_tail, b'')
                 self.assertEqual(dco.unused_data, remainder)
 
-    if hasattr(zlib.decompressobj(), "copy"):
-        def test_decompresscopy(self):
-            # Test copying a decompression object
-            data = HAMLET_SCENE
-            comp = zlib.compress(data)
+    @requires_Decompress_copy
+    def test_decompresscopy(self):
+        # Test copying a decompression object
+        data = HAMLET_SCENE
+        comp = zlib.compress(data)
 
-            d0 = zlib.decompressobj()
-            bufs0 = []
-            bufs0.append(d0.decompress(comp[:32]))
+        d0 = zlib.decompressobj()
+        bufs0 = []
+        bufs0.append(d0.decompress(comp[:32]))
 
-            d1 = d0.copy()
-            bufs1 = bufs0[:]
+        d1 = d0.copy()
+        bufs1 = bufs0[:]
 
-            bufs0.append(d0.decompress(comp[32:]))
-            s0 = ''.join(bufs0)
+        bufs0.append(d0.decompress(comp[32:]))
+        s0 = ''.join(bufs0)
 
-            bufs1.append(d1.decompress(comp[32:]))
-            s1 = ''.join(bufs1)
+        bufs1.append(d1.decompress(comp[32:]))
+        s1 = ''.join(bufs1)
 
-            self.assertEqual(s0,s1)
-            self.assertEqual(s0,data)
+        self.assertEqual(s0,s1)
+        self.assertEqual(s0,data)
 
-        def test_baddecompresscopy(self):
-            # Test copying a compression object in an inconsistent state
-            data = zlib.compress(HAMLET_SCENE)
-            d = zlib.decompressobj()
-            d.decompress(data)
-            d.flush()
-            self.assertRaises(ValueError, d.copy)
+    @requires_Decompress_copy
+    def test_baddecompresscopy(self):
+        # Test copying a compression object in an inconsistent state
+        data = zlib.compress(HAMLET_SCENE)
+        d = zlib.decompressobj()
+        d.decompress(data)
+        d.flush()
+        self.assertRaises(ValueError, d.copy)
 
     # Memory use of the following functions takes into account overallocation
 

-- 
Repository URL: https://hg.python.org/jython


More information about the Jython-checkins mailing list