[pypy-commit] pypy gc-disable: improve the return value of gc.collect_step: now it returns a GcCollectStepStats, the same which is passed to the on_gc_collect_step hook; in particular, now we can know the old and the new GC state

Wed Oct 24 12:14:47 EDT 2018

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: gc-disable
Changeset: r95235:2db479ad4b70
Date: 2018-10-24 18:14 +0200
http://bitbucket.org/pypy/pypy/changeset/2db479ad4b70/

Log:	improve the return value of gc.collect_step: now it returns a
	GcCollectStepStats, the same which is passed to the
	on_gc_collect_step hook; in particular, now we can know the old and
	the new GC state

diff --git a/pypy/module/gc/hook.py b/pypy/module/gc/hook.py
--- a/pypy/module/gc/hook.py
+++ b/pypy/module/gc/hook.py
@@ -1,5 +1,6 @@
 from rpython.memory.gc.hook import GcHooks
-from rpython.memory.gc import incminimark 
+from rpython.memory.gc import incminimark
+from rpython.rlib import rgc
 from rpython.rlib.nonconst import NonConstant
 from rpython.rlib.rarithmetic import r_uint, r_longlong, longlongmax
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -190,7 +191,8 @@
             self.duration_min,
             self.duration_max,
             self.oldstate,
-            self.newstate)
+            self.newstate,
+            rgc.is_done__states(self.oldstate, self.newstate))
         self.reset()
         self.space.call_function(self.w_callable, w_stats)
 
@@ -250,15 +252,32 @@
 
 
 class W_GcCollectStepStats(W_Root):
+    # NOTE: this is specific to incminimark: if we want to integrate the
+    # applevel gc module with another gc, we probably need a more general
+    # approach to this.
+    #
+    # incminimark has 4 GC states: scanning, marking, sweeping and
+    # finalizing. However, from the user point of view, we have an additional
+    # "virtual" state: USERDEL, which represent when we run applevel
+    # finalizers after having completed a GC major collection. This state is
+    # never explicitly visible when using hooks, but it is used for the return
+    # value of gc.collect_step (see interp_gc.py)
+    STATE_SCANNING = incminimark.STATE_SCANNING
+    STATE_MARKING = incminimark.STATE_MARKING
+    STATE_SWEEPING = incminimark.STATE_SWEEPING
+    STATE_FINALIZING = incminimark.STATE_FINALIZING
+    STATE_USERDEL = incminimark.STATE_FINALIZING + 1 # used by StepCollector
+    GC_STATES = tuple(incminimark.GC_STATES + ['USERDEL'])
 
     def __init__(self, count, duration, duration_min, duration_max,
-                 oldstate, newstate):
+                 oldstate, newstate, major_is_done):
         self.count = count
         self.duration = duration
         self.duration_min = duration_min
         self.duration_max = duration_max
         self.oldstate = oldstate
         self.newstate = newstate
+        self.major_is_done = major_is_done
 
 
 class W_GcCollectStats(W_Root):
@@ -314,11 +333,16 @@
 
 W_GcCollectStepStats.typedef = TypeDef(
     "GcCollectStepStats",
-    STATE_SCANNING = incminimark.STATE_SCANNING,
-    STATE_MARKING = incminimark.STATE_MARKING,
-    STATE_SWEEPING = incminimark.STATE_SWEEPING,
-    STATE_FINALIZING = incminimark.STATE_FINALIZING,
-    GC_STATES = tuple(incminimark.GC_STATES),
+    STATE_SCANNING = W_GcCollectStepStats.STATE_SCANNING,
+    STATE_MARKING = W_GcCollectStepStats.STATE_MARKING,
+    STATE_SWEEPING = W_GcCollectStepStats.STATE_SWEEPING,
+    STATE_FINALIZING = W_GcCollectStepStats.STATE_FINALIZING,
+    STATE_USERDEL = W_GcCollectStepStats.STATE_USERDEL,
+    GC_STATES = tuple(W_GcCollectStepStats.GC_STATES),
+    major_is_done = interp_attrproperty(
+        "major_is_done",
+        cls=W_GcCollectStepStats,
+        wrapfn="newbool"),
     **wrap_many_ints(W_GcCollectStepStats, (
         "count",
         "duration",
diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py
--- a/pypy/module/gc/interp_gc.py
+++ b/pypy/module/gc/interp_gc.py
@@ -1,6 +1,7 @@
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter.error import oefmt
 from rpython.rlib import rgc
+from pypy.module.gc.hook import W_GcCollectStepStats
 
 
 @unwrap_spec(generation=int)
@@ -96,12 +97,27 @@
         if self.finalizing:
             self._run_finalizers()
             self.finalizing = False
-            return True # everything done
+            oldstate = W_GcCollectStepStats.STATE_USERDEL
+            newstate = W_GcCollectStepStats.STATE_SCANNING
+            major_is_done = True # now we are finally done
         else:
-            done = self._collect_step()
-            if done:
+            states = self._collect_step()
+            oldstate = rgc.old_state(states)
+            newstate = rgc.new_state(states)
+            major_is_done = False  # USERDEL still to do
+            if rgc.is_done(states):
+                newstate = W_GcCollectStepStats.STATE_USERDEL
                 self.finalizing = True
-            return False # still something to do
+        #
+        duration = -1
+        return W_GcCollectStepStats(
+            count = 1,
+            duration = duration,
+            duration_min = duration,
+            duration_max = duration,
+            oldstate = oldstate,
+            newstate = newstate,
+            major_is_done = major_is_done)
 
     def _collect_step(self):
         return rgc.collect_step()
@@ -116,7 +132,8 @@
     If the GC is not incremental, do a full collection and return True.
     """
     sc = space.fromcache(StepCollector)
-    return space.newbool(sc.do())
+    w_stats = sc.do()
+    return w_stats
 
 # ____________________________________________________________
 
diff --git a/pypy/module/gc/test/test_gc.py b/pypy/module/gc/test/test_gc.py
--- a/pypy/module/gc/test/test_gc.py
+++ b/pypy/module/gc/test/test_gc.py
@@ -3,7 +3,7 @@
 from rpython.rlib import rgc
 from pypy.interpreter.baseobjspace import ObjSpace
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.module.gc.interp_gc import StepCollector
+from pypy.module.gc.interp_gc import StepCollector, W_GcCollectStepStats
 
 class AppTestGC(object):
 
@@ -113,7 +113,7 @@
         n = 0
         while True:
             n += 1
-            if gc.collect_step():
+            if gc.collect_step().major_is_done:
                 break
 
         assert n >= 2 # at least one step + 1 finalizing
@@ -194,33 +194,52 @@
 
 
 def test_StepCollector():
+    W = W_GcCollectStepStats
+    SCANNING = W.STATE_SCANNING
+    MARKING = W.STATE_MARKING
+    SWEEPING = W.STATE_SWEEPING
+    FINALIZING = W.STATE_FINALIZING
+    USERDEL = W.STATE_USERDEL
+
     class MyStepCollector(StepCollector):
         my_steps = 0
         my_done = False
         my_finalized = 0
 
+        def __init__(self):
+            StepCollector.__init__(self, space=None)
+            self._state_transitions = iter([
+                (SCANNING, MARKING),
+                (MARKING, SWEEPING),
+                (SWEEPING, FINALIZING),
+                (FINALIZING, SCANNING)])
+
         def _collect_step(self):
             self.my_steps += 1
-            return self.my_done
+            try:
+                oldstate, newstate = next(self._state_transitions)
+            except StopIteration:
+                assert False, 'should not happen, did you call _collect_step too much?'
+            return rgc._encode_states(oldstate, newstate)
 
         def _run_finalizers(self):
             self.my_finalized += 1
 
-    sc = MyStepCollector(space=None)
-    assert not sc.do()
-    assert sc.my_steps == 1
-    assert not sc.do()
-    assert sc.my_steps == 2
-    sc.my_done = True
-    assert not sc.do()
-    assert sc.my_steps == 3
-    assert sc.my_finalized == 0
-    assert sc.finalizing
-    assert sc.do()
-    assert sc.my_steps == 3
-    assert sc.my_finalized == 1
-    assert not sc.finalizing
-    assert not sc.do()
-    assert sc.my_steps == 4
-    assert sc.my_finalized == 1
+    sc = MyStepCollector()
+    transitions = []
+    while True:
+        result = sc.do()
+        transitions.append((result.oldstate, result.newstate, sc.my_finalized))
+        if result.major_is_done:
+            break
 
+    assert transitions == [
+        (SCANNING, MARKING, False),
+        (MARKING, SWEEPING, False),
+        (SWEEPING, FINALIZING, False),
+        (FINALIZING, USERDEL, False),
+        (USERDEL, SCANNING, True)
+    ]
+    # there is one more transition than actual step, because
+    # FINALIZING->USERDEL is "virtual"
+    assert sc.my_steps == len(transitions) - 1
diff --git a/pypy/module/gc/test/test_hook.py b/pypy/module/gc/test/test_hook.py
--- a/pypy/module/gc/test/test_hook.py
+++ b/pypy/module/gc/test/test_hook.py
@@ -69,26 +69,29 @@
 
     def test_on_gc_collect_step(self):
         import gc
+        SCANNING = 0
+        MARKING = 1
+        SWEEPING = 2
+        FINALIZING = 3
         lst = []
         def on_gc_collect_step(stats):
             lst.append((stats.count,
                         stats.duration,
                         stats.oldstate,
-                        stats.newstate))
+                        stats.newstate,
+                        stats.major_is_done))
         gc.hooks.on_gc_collect_step = on_gc_collect_step
-        self.fire_gc_collect_step(10, 20, 30)
-        self.fire_gc_collect_step(40, 50, 60)
+        self.fire_gc_collect_step(10, SCANNING, MARKING)
+        self.fire_gc_collect_step(40, FINALIZING, SCANNING)
         assert lst == [
-            (1, 10, 20, 30),
-            (1, 40, 50, 60),
+            (1, 10, SCANNING, MARKING, False),
+            (1, 40, FINALIZING, SCANNING, True),
             ]
         #
         gc.hooks.on_gc_collect_step = None
-        self.fire_gc_collect_step(70, 80, 90)  # won't fire
-        assert lst == [
-            (1, 10, 20, 30),
-            (1, 40, 50, 60),
-            ]
+        oldlst = lst[:]
+        self.fire_gc_collect_step(70, SCANNING, MARKING)  # won't fire
+        assert lst == oldlst
 
     def test_on_gc_collect(self):
         import gc
@@ -123,7 +126,8 @@
         assert S.STATE_MARKING == 1
         assert S.STATE_SWEEPING == 2
         assert S.STATE_FINALIZING == 3
-        assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', 'FINALIZING')
+        assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING',
+                               'FINALIZING', 'USERDEL')
 
     def test_cumulative(self):
         import gc
diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py
--- a/rpython/rlib/rgc.py
+++ b/rpython/rlib/rgc.py
@@ -33,20 +33,30 @@
 def _encode_states(oldstate, newstate):
     return oldstate << 16 | newstate
 
-def old_state(val):
-    return (val & 0xFFFF0000) >> 16
+def old_state(states):
+    return (states & 0xFFFF0000) >> 16
 
-def new_state(val):
-    return val & 0xFFFF
+def new_state(states):
+    return states & 0xFFFF
 
-def is_done(val):
+def is_done(states):
+    """
+    Return True if the return value of collect_step signals the end of a major
+    collection
+    """
+    old = old_state(states)
+    new = new_state(states)
+    return is_done__states(old, new)
+
+def is_done__states(oldstate, newstate):
+    "Like is_done, but takes oldstate and newstate explicitly"
     # a collection is considered done when it ends up in the starting state
     # (which is usually represented as 0). This logic works for incminimark,
     # which is currently the only gc actually used and for which collect_step
     # is implemented. In case we add more GC in the future, we might want to
     # delegate this logic to the GC itself, but for now it is MUCH simpler to
     # just write it in plain RPython.
-    return old_state(val) != 0 and new_state(val) == 0
+    return oldstate != 0 and newstate == 0
 
 def set_max_heap_size(nbytes):
     """Limit the heap size to n bytes.