[Jython-checkins] jython: Proper processing of gzip trailer without resubmission

Sun Dec 2 07:19:19 EST 2018

https://hg.python.org/jython/rev/c25c2edd4b16
changeset:   8200:c25c2edd4b16
user:        Ray Ferguson <github.public at devendortech.com>
date:        Sun Dec 02 08:14:35 2018 +0000
summary:
  Proper processing of gzip trailer without resubmission

Supports pip and msgpack use with Jython.
From https://github.com/jythontools/jython/pull/111

files:
  Lib/test/test_zlib_jy.py |  38 ++++++++++++++++++++++++
  Lib/zlib.py              |  43 ++++++++++++++++++---------
  2 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/Lib/test/test_zlib_jy.py b/Lib/test/test_zlib_jy.py
--- a/Lib/test/test_zlib_jy.py
+++ b/Lib/test/test_zlib_jy.py
@@ -28,6 +28,44 @@
         compressed = array('c', compress('jython'))
         self.assertEqual('jython', decompress(compressed))
 
+    def test_decompress_gzip(self):
+        co = zlib.compressobj(wbits=31)  # window 15 with gzip wrapper.
+        c = co.compress("Jenny: 867-5309")
+        c += co.flush()
+        dco = zlib.decompressobj(wbits=31)
+        d = dco.decompress(c)
+        self.assertEqual(b'', dco.unused_data, msg="dco.unused_data not empty after decompress.")
+        self.assertEqual(b'', dco.unconsumed_tail, msg="dco.unconsumed_tail not empty after decompress.")
+        self.assertEqual("Jenny: 867-5309", d)
+
+    def test_decompress_badlen(self):
+        # Manipulating last two bytes to create invalid initial size check.
+        # RFC-1952:
+        #    0   1   2   3   4   5   6   7
+        #  +---+---+---+---+---+---+---+---+
+        #  |     CRC32     |     ISIZE     |
+        #  +---+---+---+---+---+---+---+---+turn:
+        #
+        c=b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x0bJ\xacT(O,V\xc8H-J\x05\x00\xc2\xb0\x1e\xe5\x0d\x00\x00\x00'
+        dco = zlib.decompressobj(wbits=31)
+        self.assertRaisesRegexp(zlib.error, 'Error -3 while decompressing data: incorrect length check',
+                                dco.decompress, c)
+
+    def test_decompress_badcrc(self):
+        # Manipulating last crc bytes to create a crc check exception.
+        # RFC-1952:
+        #    0   1   2   3   4   5   6   7
+        #  +---+---+---+---+---+---+---+---+
+        #  |     CRC32     |     ISIZE     |
+        #  +---+---+---+---+---+---+---+---+turn:
+        #
+        c=b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x0bJ\xacT(O,V\xc8H-J\x05\x00\xc2\xb0\x1f\xe5\x0c\x00\x00\x00'
+        dco = zlib.decompressobj(wbits=31)
+        self.assertRaisesRegexp(zlib.error, 'Error -3 while decompressing data: incorrect data check',
+                                dco.decompress, c)
+
+
+
 
 def test_main():
     test_support.run_unittest(ArrayTestCase)
diff --git a/Lib/zlib.py b/Lib/zlib.py
--- a/Lib/zlib.py
+++ b/Lib/zlib.py
@@ -24,9 +24,6 @@
 from java.util.zip import Adler32, CRC32, Deflater, Inflater, DataFormatException
 
 
-class error(Exception):
-    pass
-
 
 DEFLATED = 8
 MAX_WBITS = 15
@@ -54,7 +51,8 @@
 }
 
 
-_ADLER_BASE = 65521  # largest prime smaller than 65536
+_ADLER_BASE = 65521     # largest prime smaller than 65536
+_MASK32 = 0xffffffffL   # 2**32 - 1 used for unsigned mod 2**32
 
 def adler32(s, value=1):
     # Although Java has an implmentation in java.util.zip.Adler32,
@@ -110,6 +108,7 @@
 # > system will be set to 255 (unknown). If a gzip stream is being
 # > written, strm->adler is a crc32 instead of an adler32.
 
+
 class compressobj(object):
     # All jython uses wbits for is in deciding whether to skip the
     # header if it's negative or to set gzip. But we still raise
@@ -118,10 +117,11 @@
     GZIP_HEADER = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x04\x03"
 
     # NB: this format is little-endian, not big-endian as we might
-    # expect for network oriented protocols, as specified by RFCs;
-    # CRC32.getValue() returns an unsigned int as a long, so cope
-    # accordingly
-    GZIP_TRAILER_FORMAT = struct.Struct("<Ii")  # crc32, size
+    # expect for network oriented protocols. Both are 4 bytes unsigned
+    # modulus 2^32 per RFC-1952. CRC32.getValue() returns an unsigned
+    # int as a long, so cope accordingly. 
+    GZIP_TRAILER_FORMAT = struct.Struct("<II")  # crc32, size
+
 
     def __init__(self, level=6, method=DEFLATED, wbits=MAX_WBITS,
                        memLevel=0, strategy=0):
@@ -165,7 +165,7 @@
         if mode == Z_FINISH:
             if self._gzip:
                 last += self.GZIP_TRAILER_FORMAT.pack(
-                    self._crc32.getValue(), self._size % sys.maxint)
+                    self._crc32.getValue(), self._size & _MASK32)
             self.deflater.end()
             self._ended = True
         return last
@@ -190,6 +190,8 @@
         self.unconsumed_tail = ""
         self.gzip = wbits < 0
         self.gzip_header_skipped = False
+        self._crc32 = CRC32()
+
 
     def decompress(self, string, max_length=0):
         if self._ended:
@@ -224,10 +226,20 @@
 
         self.inflater.setInput(string)
         inflated = _get_inflate_data(self.inflater, max_length)
+        self._crc32.update(inflated)
 
         r = self.inflater.getRemaining()
         if r:
-            if max_length and not self.inflater.finished():
+            if self.gzip and self.inflater.finished() and r == 8:
+                # Consume tail, check inflate size, and crc32
+                crc,isize = struct.unpack_from("<LL", string[-r:])
+                mysize = self.inflater.getBytesWritten() & _MASK32
+                mycrc = self._crc32.getValue() & _MASK32
+                if mysize != isize:
+                    raise error('Error -3 while decompressing data: incorrect length check')
+                if mycrc != crc:
+                    raise error("Error -3 while decompressing data: incorrect data check")
+            elif max_length and not self.inflater.finished():
                 self.unconsumed_tail = string[-r:]
             else:
                 self.unused_data += string[-r:]
@@ -300,9 +312,8 @@
 
 def _skip_gzip_header(string):
     # per format specified in https://tools.ietf.org/html/rfc1952
-    
-    # could we use bytearray instead?
-    s = array.array("B", string)
+
+    s = bytearray(string)
 
     id1 = s[0]
     id2 = s[1]
@@ -333,8 +344,10 @@
     if flg & FHCRC:
         # skip CRC16 for the header - might be nice to check of course
         s = s[2:]
-    
-    return s.tostring()
+
+    return bytes(s)
 
 
 
+class error(Exception):
+    pass

-- 
Repository URL: https://hg.python.org/jython