[Jython-checkins] jython: Proper processing of gzip trailer without resubmission
jeff.allen
jython-checkins at python.org
Sun Dec 2 07:19:19 EST 2018
https://hg.python.org/jython/rev/c25c2edd4b16
changeset: 8200:c25c2edd4b16
user: Ray Ferguson <github.public at devendortech.com>
date: Sun Dec 02 08:14:35 2018 +0000
summary:
Proper processing of gzip trailer without resubmission
Supports pip and msgpack use with Jython.
From https://github.com/jythontools/jython/pull/111
files:
Lib/test/test_zlib_jy.py | 38 ++++++++++++++++++++++++
Lib/zlib.py | 43 ++++++++++++++++++---------
2 files changed, 66 insertions(+), 15 deletions(-)
diff --git a/Lib/test/test_zlib_jy.py b/Lib/test/test_zlib_jy.py
--- a/Lib/test/test_zlib_jy.py
+++ b/Lib/test/test_zlib_jy.py
@@ -28,6 +28,44 @@
compressed = array('c', compress('jython'))
self.assertEqual('jython', decompress(compressed))
+ def test_decompress_gzip(self):
+ co = zlib.compressobj(wbits=31) # window 15 with gzip wrapper.
+ c = co.compress("Jenny: 867-5309")
+ c += co.flush()
+ dco = zlib.decompressobj(wbits=31)
+ d = dco.decompress(c)
+ self.assertEqual(b'', dco.unused_data, msg="dco.unused_data not empty after decompress.")
+ self.assertEqual(b'', dco.unconsumed_tail, msg="dco.unconsumed_tail not empty after decompress.")
+ self.assertEqual("Jenny: 867-5309", d)
+
+ def test_decompress_badlen(self):
+ # Manipulating last two bytes to create invalid initial size check.
+ # RFC-1952:
+ # 0 1 2 3 4 5 6 7
+ # +---+---+---+---+---+---+---+---+
+ # | CRC32 | ISIZE |
+ # +---+---+---+---+---+---+---+---+turn:
+ #
+ c=b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x0bJ\xacT(O,V\xc8H-J\x05\x00\xc2\xb0\x1e\xe5\x0d\x00\x00\x00'
+ dco = zlib.decompressobj(wbits=31)
+ self.assertRaisesRegexp(zlib.error, 'Error -3 while decompressing data: incorrect length check',
+ dco.decompress, c)
+
+ def test_decompress_badcrc(self):
+ # Manipulating last crc bytes to create a crc check exception.
+ # RFC-1952:
+ # 0 1 2 3 4 5 6 7
+ # +---+---+---+---+---+---+---+---+
+ # | CRC32 | ISIZE |
+ # +---+---+---+---+---+---+---+---+turn:
+ #
+ c=b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x0bJ\xacT(O,V\xc8H-J\x05\x00\xc2\xb0\x1f\xe5\x0c\x00\x00\x00'
+ dco = zlib.decompressobj(wbits=31)
+ self.assertRaisesRegexp(zlib.error, 'Error -3 while decompressing data: incorrect data check',
+ dco.decompress, c)
+
+
+
def test_main():
test_support.run_unittest(ArrayTestCase)
diff --git a/Lib/zlib.py b/Lib/zlib.py
--- a/Lib/zlib.py
+++ b/Lib/zlib.py
@@ -24,9 +24,6 @@
from java.util.zip import Adler32, CRC32, Deflater, Inflater, DataFormatException
-class error(Exception):
- pass
-
DEFLATED = 8
MAX_WBITS = 15
@@ -54,7 +51,8 @@
}
-_ADLER_BASE = 65521 # largest prime smaller than 65536
+_ADLER_BASE = 65521 # largest prime smaller than 65536
+_MASK32 = 0xffffffffL # 2**32 - 1 used for unsigned mod 2**32
def adler32(s, value=1):
# Although Java has an implmentation in java.util.zip.Adler32,
@@ -110,6 +108,7 @@
# > system will be set to 255 (unknown). If a gzip stream is being
# > written, strm->adler is a crc32 instead of an adler32.
+
class compressobj(object):
# All jython uses wbits for is in deciding whether to skip the
# header if it's negative or to set gzip. But we still raise
@@ -118,10 +117,11 @@
GZIP_HEADER = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x04\x03"
# NB: this format is little-endian, not big-endian as we might
- # expect for network oriented protocols, as specified by RFCs;
- # CRC32.getValue() returns an unsigned int as a long, so cope
- # accordingly
- GZIP_TRAILER_FORMAT = struct.Struct("<Ii") # crc32, size
+ # expect for network oriented protocols. Both are 4 bytes unsigned
+ # modulus 2^32 per RFC-1952. CRC32.getValue() returns an unsigned
+ # int as a long, so cope accordingly.
+ GZIP_TRAILER_FORMAT = struct.Struct("<II") # crc32, size
+
def __init__(self, level=6, method=DEFLATED, wbits=MAX_WBITS,
memLevel=0, strategy=0):
@@ -165,7 +165,7 @@
if mode == Z_FINISH:
if self._gzip:
last += self.GZIP_TRAILER_FORMAT.pack(
- self._crc32.getValue(), self._size % sys.maxint)
+ self._crc32.getValue(), self._size & _MASK32)
self.deflater.end()
self._ended = True
return last
@@ -190,6 +190,8 @@
self.unconsumed_tail = ""
self.gzip = wbits < 0
self.gzip_header_skipped = False
+ self._crc32 = CRC32()
+
def decompress(self, string, max_length=0):
if self._ended:
@@ -224,10 +226,20 @@
self.inflater.setInput(string)
inflated = _get_inflate_data(self.inflater, max_length)
+ self._crc32.update(inflated)
r = self.inflater.getRemaining()
if r:
- if max_length and not self.inflater.finished():
+ if self.gzip and self.inflater.finished() and r == 8:
+ # Consume tail, check inflate size, and crc32
+ crc,isize = struct.unpack_from("<LL", string[-r:])
+ mysize = self.inflater.getBytesWritten() & _MASK32
+ mycrc = self._crc32.getValue() & _MASK32
+ if mysize != isize:
+ raise error('Error -3 while decompressing data: incorrect length check')
+ if mycrc != crc:
+ raise error("Error -3 while decompressing data: incorrect data check")
+ elif max_length and not self.inflater.finished():
self.unconsumed_tail = string[-r:]
else:
self.unused_data += string[-r:]
@@ -300,9 +312,8 @@
def _skip_gzip_header(string):
# per format specified in https://tools.ietf.org/html/rfc1952
-
- # could we use bytearray instead?
- s = array.array("B", string)
+
+ s = bytearray(string)
id1 = s[0]
id2 = s[1]
@@ -333,8 +344,10 @@
if flg & FHCRC:
# skip CRC16 for the header - might be nice to check of course
s = s[2:]
-
- return s.tostring()
+
+ return bytes(s)
+class error(Exception):
+ pass
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list