[pypy-commit] pypy default: hg merge gc-del-limit-growth

Mon Jul 31 05:41:09 EDT 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r91995:44577e4653fa
Date: 2017-07-30 18:58 +0200
http://bitbucket.org/pypy/pypy/changeset/44577e4653fa/

Log:	hg merge gc-del-limit-growth

	Issue #2590: fix the bounds in the GC when allocating a lot of
	objects with finalizers

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -2308,6 +2308,7 @@
                 ll_assert(not (self.probably_young_objects_with_finalizers
                                .non_empty()),
                     "probably_young_objects_with_finalizers should be empty")
+                self.kept_alive_by_finalizer = r_uint(0)
                 if self.old_objects_with_finalizers.non_empty():
                     self.deal_with_objects_with_finalizers()
                 elif self.old_objects_with_weakrefs.non_empty():
@@ -2380,6 +2381,9 @@
                 # we currently have -- but no more than 'max_delta' more than
                 # we currently have.
                 total_memory_used = float(self.get_total_memory_used())
+                total_memory_used -= float(self.kept_alive_by_finalizer)
+                if total_memory_used < 0:
+                    total_memory_used = 0
                 bounded = self.set_major_threshold_from(
                     min(total_memory_used * self.major_collection_threshold,
                         total_memory_used + self.max_delta),
@@ -2418,7 +2422,7 @@
             self.execute_finalizers()
             #END FINALIZING
         else:
-            pass #XXX which exception to raise here. Should be unreachable.
+            ll_assert(False, "bogus gc_state")
 
         debug_print("stopping, now in gc state: ", GC_STATES[self.gc_state])
         debug_stop("gc-collect-step")
@@ -2784,8 +2788,17 @@
     def _bump_finalization_state_from_0_to_1(self, obj):
         ll_assert(self._finalization_state(obj) == 0,
                   "unexpected finalization state != 0")
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        totalsize = size_gc_header + self.get_size(obj)
         hdr = self.header(obj)
         hdr.tid |= GCFLAG_FINALIZATION_ORDERING
+        # A bit hackish, but we will not count these objects as "alive"
+        # for the purpose of computing when the next major GC should
+        # occur.  This is done for issue #2590: without this, if we
+        # allocate mostly objects with finalizers, the
+        # next_major_collection_threshold grows forever and actual
+        # memory usage is not bounded.
+        self.kept_alive_by_finalizer += raw_malloc_usage(totalsize)
 
     def _recursively_bump_finalization_state_from_2_to_3(self, obj):
         ll_assert(self._finalization_state(obj) == 2,
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -1636,6 +1636,7 @@
         # with a finalizer and all objects reachable from there (and also
         # moves some objects from 'objects_with_finalizers' to
         # 'run_finalizers').
+        self.kept_alive_by_finalizer = r_uint(0)
         if self.old_objects_with_finalizers.non_empty():
             self.deal_with_objects_with_finalizers()
         #
@@ -1678,6 +1679,9 @@
         # we currently have -- but no more than 'max_delta' more than
         # we currently have.
         total_memory_used = float(self.get_total_memory_used())
+        total_memory_used -= float(self.kept_alive_by_finalizer)
+        if total_memory_used < 0:
+            total_memory_used = 0
         bounded = self.set_major_threshold_from(
             min(total_memory_used * self.major_collection_threshold,
                 total_memory_used + self.max_delta),
@@ -1999,8 +2003,11 @@
     def _bump_finalization_state_from_0_to_1(self, obj):
         ll_assert(self._finalization_state(obj) == 0,
                   "unexpected finalization state != 0")
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        totalsize = size_gc_header + self.get_size(obj)
         hdr = self.header(obj)
         hdr.tid |= GCFLAG_FINALIZATION_ORDERING
+        self.kept_alive_by_finalizer += raw_malloc_usage(totalsize)
 
     def _recursively_bump_finalization_state_from_2_to_3(self, obj):
         ll_assert(self._finalization_state(obj) == 2,
diff --git a/rpython/memory/test/test_minimark_gc.py b/rpython/memory/test/test_minimark_gc.py
--- a/rpython/memory/test/test_minimark_gc.py
+++ b/rpython/memory/test/test_minimark_gc.py
@@ -1,3 +1,4 @@
+from rpython.rlib import rgc
 from rpython.rlib.rarithmetic import LONG_BIT
 
 from rpython.memory.test import test_semispace_gc
@@ -9,3 +10,39 @@
     GC_CAN_SHRINK_BIG_ARRAY = False
     GC_CAN_MALLOC_NONMOVABLE = True
     BUT_HOW_BIG_IS_A_BIG_STRING = 11*WORD
+
+    def test_bounded_memory_when_allocating_with_finalizers(self):
+        # Issue #2590: when allocating a lot of objects with a finalizer
+        # and little else, the bounds in the (inc)minimark GC are not
+        # set up reasonably and the total memory usage grows without
+        # limit.
+        class B(object):
+            pass
+        b = B()
+        b.num_deleted = 0
+        class A(object):
+            def __init__(self):
+                fq.register_finalizer(self)
+        class FQ(rgc.FinalizerQueue):
+            Class = A
+            def finalizer_trigger(self):
+                while True:
+                    a = self.next_dead()
+                    if a is None:
+                        break
+                    b.num_deleted += 1
+        fq = FQ()
+        def f(x, y):
+            i = 0
+            alive_max = 0
+            while i < x:
+                i += 1
+                a = A()
+                a.x = a.y = a.z = i
+                #print i - b.num_deleted, b.num_deleted
+                alive = i - b.num_deleted
+                assert alive >= 0
+                alive_max = max(alive_max, alive)
+            return alive_max
+        res = self.interpret(f, [1000, 0])
+        assert res < 100