[pypy-commit] pypy py3.5-bz2-lzma: adding properties and new parameters in py3.5 (work in progress)

Mon Sep 26 11:37:41 EDT 2016

Author: Richard Plangger <planrichi at gmail.com>
Branch: py3.5-bz2-lzma
Changeset: r87393:3bc7c28ee3a9
Date: 2016-09-26 17:36 +0200
http://bitbucket.org/pypy/pypy/changeset/3bc7c28ee3a9/

Log:	adding properties and new parameters in py3.5 (work in progress)

diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -96,9 +96,11 @@
 BZ_SEQUENCE_ERROR = cConfig.BZ_SEQUENCE_ERROR
 
 if BUFSIZ < 8192:
-    SMALLCHUNK = 8192
+    INITIAL_BUFFER_SIZE = 8192
 else:
-    SMALLCHUNK = BUFSIZ
+    INITIAL_BUFFER_SIZE = 8192
+
+UINT_MAX = 2**32-1
 
 if rffi.sizeof(rffi.INT) > 4:
     BIGCHUNK = 512 * 32
@@ -187,12 +189,21 @@
     encapsulate the logic of setting up the fields of 'bzs' and
     allocating raw memory as needed.
     """
-    def __init__(self, bzs, initial_size=SMALLCHUNK):
+    def __init__(self, bzs, initial_size=INITIAL_BUFFER_SIZE, max_length=-1):
         # when the constructor is called, allocate a piece of memory
         # of length 'piece_size' and make bzs ready to dump there.
         self.temp = []
         self.bzs = bzs
-        self._allocate_chunk(initial_size)
+        self.max_length = max_length
+        if max_length < 0 or max_length >= initial_size:
+            size = initial_size
+        else:
+            size = max_length
+        self._allocate_chunk(size)
+        self.avail_in_real = 0
+
+    def get_data_size(self):
+        return 0
 
     def _allocate_chunk(self, size):
         self.raw_buf, self.gc_buf, self.case_num = rffi.alloc_buffer(size)
@@ -357,7 +368,6 @@
     W_BZ2Decompressor.__init__(x, space)
     return space.wrap(x)
 
-
 class W_BZ2Decompressor(W_Root):
     """BZ2Decompressor() -> decompressor object
 
@@ -372,6 +382,8 @@
         try:
             self.running = False
             self.unused_data = ""
+            self.needs_input = 1
+            self.input_buffer = None
 
             self._init_bz2decomp()
         except:
@@ -397,15 +409,47 @@
     def descr_getstate(self):
         raise oefmt(self.space.w_TypeError, "cannot serialize '%T' object", self)
 
+    def needs_input_w(self, space):
+        """ True if more input is needed before more decompressed
+            data can be produced. """
+        return space.wrap(self.needs_input)
+
     def eof_w(self, space):
         if self.running:
             return space.w_False
         else:
             return space.w_True
 
-    @unwrap_spec(data='bufferstr')
-    def decompress(self, data):
-        """decompress(data) -> string
+    def _decompress_buf(self, data, max_length):
+        in_bufsize = len(data)
+
+        with rffi.scoped_nonmovingbuffer(data) as in_buf:
+            self.bzs.c_next_in = in_buf
+            rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
+
+            with OutBuffer(self.bzs, max_length=max_length) as out:
+                while True:
+                    bzerror = BZ2_bzDecompress(self.bzs)
+                    if bzerror == BZ_STREAM_END:
+                        self.running = False
+                        break
+                    if bzerror != BZ_OK:
+                        _catch_bz2_error(self.space, bzerror)
+
+                    if rffi.getintfield(self.bzs, 'c_avail_in') == 0:
+                        break
+                    elif rffi.getintfield(self.bzs, 'c_avail_out') == 0:
+                        if out.get_data_size() == max_length:
+                            break
+                        out.prepare_next_chunk()
+                res = out.make_result_string()
+                # might be non zero if max_length has been specified
+                self.left_to_process = out.left
+                return self.space.newbytes(res)
+
+    @unwrap_spec(data='bufferstr', max_length=int)
+    def decompress(self, data, max_length=-1):
+        """decompress(data, max_length=-1) -> bytes
 
         Provide more data to the decompressor object. It will return chunks
         of decompressed data whenever possible. If you try to decompress data
@@ -419,34 +463,27 @@
         if data == '':
             return self.space.newbytes('')
 
-        in_bufsize = len(data)
+        bzs = self.bzs
+        if not self.input_buffer:
+            input_buffer_in_use = True
+            result = self._decompress_buf(self.input_buffer, max_length)
+        else:
+            input_buffer_in_use = False
+            result = self._decompress_buf(data, max_length)
 
-        with rffi.scoped_nonmovingbuffer(data) as in_buf:
-            self.bzs.c_next_in = in_buf
-            rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
+        if self.left_to_process == 0:
+            self.input_buffer = None
+            self.need_input = 1
+        else:
+            self.need_input = 0
+            if not input_buffer_in_use:
+                datalen = len(data)
+                self.input_buffer = data[datalen-self.left_to_process-1:]
 
-            with OutBuffer(self.bzs) as out:
-                while True:
-                    bzerror = BZ2_bzDecompress(self.bzs)
-                    if bzerror == BZ_STREAM_END:
-                        if rffi.getintfield(self.bzs, 'c_avail_in') != 0:
-                            unused = [self.bzs.c_next_in[i]
-                                      for i in range(
-                                          rffi.getintfield(self.bzs,
-                                                           'c_avail_in'))]
-                            self.unused_data = "".join(unused)
-                        self.running = False
-                        break
-                    if bzerror != BZ_OK:
-                        _catch_bz2_error(self.space, bzerror)
+        return result
 
-                    if rffi.getintfield(self.bzs, 'c_avail_in') == 0:
-                        break
-                    elif rffi.getintfield(self.bzs, 'c_avail_out') == 0:
-                        out.prepare_next_chunk()
 
-                res = out.make_result_string()
-                return self.space.newbytes(res)
+
 
 
 W_BZ2Decompressor.typedef = TypeDef("_bz2.BZ2Decompressor",
@@ -456,5 +493,6 @@
     unused_data = interp_attrproperty_bytes("unused_data", W_BZ2Decompressor),
     eof = GetSetProperty(W_BZ2Decompressor.eof_w),
     decompress = interp2app(W_BZ2Decompressor.decompress),
+    needs_input = GetSetProperty(W_BZ2Decompressor.needs_input_w),
 )
 W_BZ2Decompressor.typedef.acceptable_as_base_class = False