[pypy-commit] pypy default: hg merge gc-more-incremental

arigo noreply at buildbot.pypy.org
Sat Sep 19 13:39:02 CEST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r79707:ba09f9a7c031
Date: 2015-09-19 13:39 +0200
http://bitbucket.org/pypy/pypy/changeset/ba09f9a7c031/

Log:	hg merge gc-more-incremental

	Remove a source of non-incremental-ness in the GC: now
	external_malloc() no longer runs gc_step_until() any more. If there
	is a currently-running major collection, we do only so many steps
	before returning. This number of steps depends on the size of the
	allocated object. It is controlled by tracking the general progress
	of these major collection steps and the size of old objects that
	keep adding up between them.

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -754,14 +754,30 @@
                 self.minor_collection()
                 if minor_collection_count == 1:
                     #
-                    # If the gc_state is not STATE_SCANNING, we're in the middle of
-                    # an incremental major collection.  In this case, always progress
-                    # one step.  If the gc_state is STATE_SCANNING, wait until there
-                    # is too much garbage before starting the next major collection.
+                    # If the gc_state is STATE_SCANNING, we're not in
+                    # the middle of an incremental major collection.
+                    # In that case, wait until there is too much
+                    # garbage before starting the next major
+                    # collection.  But if we are in the middle of an
+                    # incremental major collection, then always do (at
+                    # least) one step now.
+                    #
+                    # This will increment next_major_collection_threshold
+                    # by nursery_size//2.  If more than nursery_size//2
+                    # survives, then threshold_reached() might still be
+                    # true after that.  In that case we do a second step.
+                    # The goal is to avoid too high memory peaks if the
+                    # program allocates a lot of surviving objects.
+                    # 
                     if (self.gc_state != STATE_SCANNING or
-                                self.get_total_memory_used() >
-                                self.next_major_collection_threshold):
+                           self.threshold_reached()):
+
                         self.major_collection_step()
+
+                        if (self.gc_state != STATE_SCANNING and
+                               self.threshold_reached()):  # ^^but only if still
+                            self.minor_collection()        # the same collection
+                            self.major_collection_step()
                         #
                         # The nursery might not be empty now, because of
                         # execute_finalizers().  If it is almost full again,
@@ -825,17 +841,25 @@
             raise MemoryError
         #
         # If somebody calls this function a lot, we must eventually
-        # force a collection.  XXX make this more incremental!  For now
-        # the logic is to first do a minor GC only, and check if that
-        # was enough to free a bunch of large young objects.  If not,
-        # we do a complete major GC.
-        if self.get_total_memory_free() < raw_malloc_usage(totalsize):
+        # force a collection.  We use threshold_reached(), which might
+        # be true now but become false at some point after a few calls
+        # to major_collection_step().  If there is really no memory,
+        # then when the major collection finishes it will raise
+        # MemoryError.
+        #
+        # The logic is to first do a minor GC only, and check if that
+        # was enough to free a bunch of large young objects.  If it
+        # was, then we don't do any major collection step.
+        #
+        while self.threshold_reached(raw_malloc_usage(totalsize)):
             self.minor_collection()
-            if self.get_total_memory_free() < (raw_malloc_usage(totalsize) +
-                                               self.nursery_size // 2):
-                self.gc_step_until(STATE_SWEEPING)
-                self.gc_step_until(STATE_FINALIZING,
-                                   raw_malloc_usage(totalsize))
+            if self.threshold_reached(raw_malloc_usage(totalsize) +
+                                      self.nursery_size // 2):
+                self.major_collection_step(raw_malloc_usage(totalsize))
+            # note that this loop should not be infinite: when the
+            # last step of a major collection is done but
+            # threshold_reached(totalsize) is still true, then
+            # we should get a MemoryError from major_collection_step().
         #
         # Check if the object would fit in the ArenaCollection.
         if raw_malloc_usage(totalsize) <= self.small_request_threshold:
@@ -1112,9 +1136,9 @@
         """
         return self.ac.total_memory_used + self.rawmalloced_total_size
 
-    def get_total_memory_free(self):
+    def threshold_reached(self, extra=0):
         return (self.next_major_collection_threshold -
-                float(self.get_total_memory_used()))
+                float(self.get_total_memory_used())) < float(extra)
 
     def card_marking_words_for_length(self, length):
         # --- Unoptimized version:
@@ -2058,10 +2082,10 @@
         self.gc_step_until(STATE_MARKING)
         self.gc_step_until(STATE_SCANNING)
 
-    def gc_step_until(self, state, reserving_size=0):
+    def gc_step_until(self, state):
         while self.gc_state != state:
             self.minor_collection()
-            self.major_collection_step(reserving_size)
+            self.major_collection_step()
 
     debug_gc_step_until = gc_step_until   # xxx
 
@@ -2086,8 +2110,36 @@
             pass
         self.debug_check_consistency()
 
+        #
+        # Every call to major_collection_step() adds nursery_size//2
+        # to the threshold.  It is reset at the end of this function
+        # when the major collection is fully finished.
+        #
+        # In the common case, this is larger than the size of all
+        # objects that survive a minor collection.  After a few
+        # minor collections (each followed by one call to
+        # major_collection_step()) the threshold is much higher than
+        # the currently-in-use old memory.  Then threshold_reached()
+        # won't be true again until the major collection fully
+        # finishes, time passes, and it's time for the next major
+        # collection.
+        #
+        # However there are less common cases:
+        #
+        # * if more than half of the nursery consistently survives: we
+        #   call major_collection_step() twice after a minor
+        #   collection;
+        #
+        # * or if we're allocating a large number of bytes in
+        #   external_malloc().  In that case, we are likely to reach
+        #   again the threshold_reached() case, and more major
+        #   collection steps will be done immediately until
+        #   threshold_reached() returns false.
+        #
+        self.next_major_collection_threshold += self.nursery_size // 2
 
-        # XXX currently very course increments, get this working then split
+
+        # XXX currently very coarse increments, get this working then split
         # to smaller increments using stacks for resuming
         if self.gc_state == STATE_SCANNING:
             self.objects_to_trace = self.AddressStack()
@@ -2201,8 +2253,7 @@
                 #
                 # Max heap size: gives an upper bound on the threshold.  If we
                 # already have at least this much allocated, raise MemoryError.
-                if bounded and (float(self.get_total_memory_used()) + reserving_size >=
-                                self.next_major_collection_initial):
+                if bounded and self.threshold_reached(reserving_size):
                     #
                     # First raise MemoryError, giving the program a chance to
                     # quit cleanly.  It might still allocate in the nursery,


More information about the pypy-commit mailing list