[pypy-commit] pypy default: Merged in gc-incminimark-pinning-improve (pull request #313)

Sun Mar 29 17:07:34 CEST 2015

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: 
Changeset: r76621:9f53b151c5d6
Date: 2015-03-29 17:07 +0200
http://bitbucket.org/pypy/pypy/changeset/9f53b151c5d6/

Log:	Merged in gc-incminimark-pinning-improve (pull request #313)

	Use pinning and additional GC debugging env var

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,9 @@
 .. this is a revision shortly after release-2.5.1
 .. startrev: 397b96217b85
 
+.. branch: gc-incminimark-pinning-improve
+Object Pinning is now used in `bz2` and `rzlib` (therefore also affects
+Python's `zlib`). In case the data to compress/decompress is inside the nursery
+(incminimark) it no longer needs to create a non-moving copy of it. This saves
+one `malloc` and copying the data.  Additionally a new GC environment variable
+is introduced (`PYPY_GC_MAX_PINNED`) primarily for debugging purposes.
diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -562,10 +562,7 @@
         in_bufsize = datasize
 
         with OutBuffer(self.bzs) as out:
-            with lltype.scoped_alloc(rffi.CCHARP.TO, in_bufsize) as in_buf:
-
-                for i in range(datasize):
-                    in_buf[i] = data[i]
+            with rffi.scoped_nonmovingbuffer(data) as in_buf:
 
                 self.bzs.c_next_in = in_buf
                 rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
@@ -663,9 +660,7 @@
 
         in_bufsize = len(data)
 
-        with lltype.scoped_alloc(rffi.CCHARP.TO, in_bufsize) as in_buf:
-            for i in range(in_bufsize):
-                in_buf[i] = data[i]
+        with rffi.scoped_nonmovingbuffer(data) as in_buf:
             self.bzs.c_next_in = in_buf
             rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
 
@@ -716,9 +711,7 @@
     with lltype.scoped_alloc(bz_stream.TO, zero=True) as bzs:
         in_bufsize = len(data)
 
-        with lltype.scoped_alloc(rffi.CCHARP.TO, in_bufsize) as in_buf:
-            for i in range(in_bufsize):
-                in_buf[i] = data[i]
+        with rffi.scoped_nonmovingbuffer(data) as in_buf:
             bzs.c_next_in = in_buf
             rffi.setintfield(bzs, 'c_avail_in', in_bufsize)
 
@@ -758,9 +751,7 @@
         return space.wrap("")
 
     with lltype.scoped_alloc(bz_stream.TO, zero=True) as bzs:
-        with lltype.scoped_alloc(rffi.CCHARP.TO, in_bufsize) as in_buf:
-            for i in range(in_bufsize):
-                in_buf[i] = data[i]
+        with rffi.scoped_nonmovingbuffer(data) as in_buf:
             bzs.c_next_in = in_buf
             rffi.setintfield(bzs, 'c_avail_in', in_bufsize)
 
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -47,6 +47,11 @@
                          too slow for normal use.  Values are 0 (off),
                          1 (on major collections) or 2 (also on minor
                          collections).
+
+ PYPY_GC_MAX_PINNED      The maximal number of pinned objects at any point
+                         in time.  Defaults to a conservative value depending
+                         on nursery size and maximum object size inside the
+                         nursery.  Useful for debugging by setting it to 0.
 """
 # XXX Should find a way to bound the major collection threshold by the
 # XXX total addressable size.  Maybe by keeping some minimarkpage arenas
@@ -56,6 +61,7 @@
 # XXX try merging old_objects_pointing_to_pinned into
 # XXX old_objects_pointing_to_young (IRC 2014-10-22, fijal and gregor_w)
 import sys
+import os
 from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, llgroup
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage
@@ -463,9 +469,19 @@
             self.nursery_size = newsize
             self.allocate_nursery()
         #
-        # Estimate this number conservatively
-        bigobj = self.nonlarge_max + 1
-        self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2)
+        env_max_number_of_pinned_objects = os.environ.get('PYPY_GC_MAX_PINNED')
+        if env_max_number_of_pinned_objects:
+            try:
+                env_max_number_of_pinned_objects = int(env_max_number_of_pinned_objects)
+            except ValueError:
+                env_max_number_of_pinned_objects = 0
+            #
+            if env_max_number_of_pinned_objects >= 0: # 0 allows to disable pinning completely
+                self.max_number_of_pinned_objects = env_max_number_of_pinned_objects
+        else:
+            # Estimate this number conservatively
+            bigobj = self.nonlarge_max + 1
+            self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2)
 
     def _nursery_memory_size(self):
         extra = self.nonlarge_max + 1
diff --git a/rpython/rlib/rzlib.py b/rpython/rlib/rzlib.py
--- a/rpython/rlib/rzlib.py
+++ b/rpython/rlib/rzlib.py
@@ -365,10 +365,8 @@
     """Common code for compress() and decompress().
     """
     # Prepare the input buffer for the stream
-    with lltype.scoped_alloc(rffi.CCHARP.TO, len(data)) as inbuf:
-        # XXX (groggi) should be possible to improve this with pinning by
-        # not performing the 'copy_string_to_raw' if non-movable/pinned
-        copy_string_to_raw(llstr(data), inbuf, 0, len(data))
+    assert data is not None # XXX seems to be sane assumption, however not for sure
+    with rffi.scoped_nonmovingbuffer(data) as inbuf:
         stream.c_next_in = rffi.cast(Bytefp, inbuf)
         rffi.setintfield(stream, 'c_avail_in', len(data))