[pypy-issue] Issue #3090: lzma sometimes fails to decompress a file (pypy/pypy)

Hiroshi Miura issues-reply at bitbucket.org
Sat Oct 12 06:00:38 EDT 2019


New issue 3090: lzma sometimes fails to decompress a file
https://bitbucket.org/pypy/pypy/issues/3090/lzma-sometimes-fails-to-decompress-a-file

Hiroshi Miura:

lama.LZMADecompress.decompress\(\) sometimes return incomplete data.

Python3.6, 3.7 and pypy3 has a same issue and Python3.8rc1 fixes the issue.

There is a  cpython issue21872  [https://bugs.python.org/issue21872](https://bugs.python.org/issue21872) and fix is [https://github.com/python/cpython/pull/14048](https://github.com/python/cpython/pull/14048)

‌

Here is a test script to reproduce a problem and test data is attached.

```python
#! /usr/bin/env python3

import functools
import lzma

def test_lzma_return_no_last_byte():
    filters = [{'id': 4}, {'id': 33, 'dict_size': 16777216}]
    target_size = 4302365
    read_blocksize = 32248
    chunk_list = [51832, 255096, 16114160, 62584, 4021328, 46712, 1847592, 45688, 1864968, 45176, 1883112, 43128,
                  1826448, 51832]
    default_max_length = 32248
    expected_length = 2042728
    skip_size = functools.reduce(lambda x, y: x+y, chunk_list)

    decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters)
    with open('testdata', 'rb') as f:
        consumed_size = 0
        # skip to target chunk
        for clen in chunk_list:
            out_remaining = clen
            outdata = b''
            while out_remaining > 0:
                max_length = min(out_remaining, default_max_length)
                if decompressor.needs_input:
                    read_size = min(read_blocksize, skip_size - consumed_size)
                    indata = f.read(read_size)
                    consumed_size += len(indata)
                    decompdata = decompressor.decompress(indata, max_length)
                    if len(decompdata) == 0:
                        break
                else:
                    decompdata = decompressor.decompress(b'', max_length)
                out_remaining -= len(decompdata)
                outdata += decompdata
            assert out_remaining == 0
            assert len(outdata) == clen

        # start decompression
        out_remaining = expected_length
        consumed_size = 0
        outdata = b''
        while out_remaining > 0:
            if not decompressor.eof:
                max_length = min(out_remaining, default_max_length)
                if decompressor.needs_input:
                    read_size = min(read_blocksize, target_size - consumed_size)
                    indata = f.read(read_size)
                    consumed_size += len(indata)
                    decompdata = decompressor.decompress(indata, max_length)
                    if len(decompdata) == 0:
                        # FIXME: should not come here.
                        break
                else:
                    decompdata = decompressor.decompress(b'', max_length)
                out_remaining -= len(decompdata)
                outdata += decompdata

        print("expected out_remaining is 0 but {}".format(out_remaining))

        # assert target decompression size
        assert len(outdata) == expected_length  # FIXME: last one byte?
```

‌




More information about the pypy-issue mailing list