[pypy-commit] pypy gc-del: Phew, finally I have (maybe) found out the right algorithm...

arigo noreply at buildbot.pypy.org
Mon Mar 25 17:48:40 CET 2013


Author: Armin Rigo <arigo at tunes.org>
Branch: gc-del
Changeset: r62743:21829c4cca75
Date: 2013-03-25 17:48 +0100
http://bitbucket.org/pypy/pypy/changeset/21829c4cca75/

Log:	Phew, finally I have (maybe) found out the right algorithm...

diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py
--- a/rpython/memory/gc/base.py
+++ b/rpython/memory/gc/base.py
@@ -74,7 +74,8 @@
                             is_rpython_class,
                             has_custom_trace,
                             get_custom_trace,
-                            fast_path_tracing):
+                            fast_path_tracing,
+                            call_finalizer):
         self.getdestructor = getdestructor
         self.is_varsize = is_varsize
         self.has_gcptr_in_varsize = has_gcptr_in_varsize
@@ -91,6 +92,7 @@
         self.has_custom_trace = has_custom_trace
         self.get_custom_trace = get_custom_trace
         self.fast_path_tracing = fast_path_tracing
+        self.call_finalizer = call_finalizer
 
     def get_member_index(self, type_id):
         return self.member_index(type_id)
@@ -351,22 +353,16 @@
                 obj = self.run_finalizers_queue.peekleft()
                 finalizer = self.registered_finalizers.get(obj)
                 ll_assert(finalizer != llmemory.NULL, "lost finalizer")
-                finalizer = llmemory.cast_adr_to_ptr(finalizer, FINALIZER)
-                try:
-                    finalizer(obj)
-                except rgc.FinalizeLater:
+                if not self.call_finalizer(finalizer, obj):
                     break
-                except Exception, e:
-                    XXX
                 obj1 = self.run_finalizers_queue.popleft()
                 ll_assert(obj1 == obj, "wrong finalized object")
+                self.registered_finalizers.setitem(obj, NULL)
+                # XXX MISSING: must clean up the dict regularly!
         finally:
             self.running_finalizers = False
 
 
-FINALIZER = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
-
-
 class MovingGCBase(GCBase):
     moving_gc = True
 
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -1250,6 +1250,10 @@
         # We proceed until 'old_objects_pointing_to_young' is empty.
         self.collect_oldrefs_to_nursery()
         #
+        # Also copy out of the nursery the objects with finalizers.
+        if self.young_objects_with_finalizers.non_empty():
+            self.deal_with_young_objects_with_finalizers()
+        #
         # Now all live nursery objects should be out.  Update the young
         # weakrefs' targets.
         if self.young_objects_with_weakrefs.non_empty():
@@ -1882,63 +1886,65 @@
     # ----------
     # Finalizers
 
-    def collect_roots_from_finalizers(self):
-        self.young_objects_with_finalizers.foreach(
-            self._collect_root_from_finalizer, None)
-
-    def _collect_root_from_finalizer(self, obj, ignored):
-        # idea: every young object with a finalizer xxxxxxxxxxxx
-        self.temp_root.address[0] = obj
-        self._trace_drag_out1(self.temp_root)
-
     def deal_with_young_objects_with_finalizers(self):
-        """We need to enqueue to run_finalizers_queue all dying young
+        """We need to enqueue to 'run_finalizers_queue' all dying young
         objects with finalizers.  As these survive for a bit longer,
-        we also need to copy them out of the nursery.
+        we also need to copy them out of the nursery.  The tricky part
+        here is to enqueue them in topological order, if possible.
         """
-        xxxxxxxx
-        if self.young_objects_call_finalizers is not None:
-            xxxx
-
-
-        if not self.young_objects_with_finalizers.non_empty():
-            return False
-
+        finalizers_scheduled = self.AddressStack()
+        pending = self.old_objects_pointing_to_young
+        #
         while self.young_objects_with_finalizers.non_empty():
             obj = self.young_objects_with_finalizers.pop()
-            if not self.is_forwarded(obj):
-                #
-                # If the object is not forwarded so far, then it is a
-                # newly discovered object that is about to be finalized.
-                ll_assert((self.header(obj).tid & GCFLAG_HAS_FINALIZER) != 0,
-                          "lost the GCFLAG_HAS_FINALIZER")
-                #
-                # The object survives this collection; it must be tracked.
-                #
-                # Add the object in the 'young_objects_call_finalizers'
-                # stack.
-                if self.young_objects_call_finalizers is None:
-                    self.young_objects_call_finalizers = self.AddressStack()
-                self.young_objects_call_finalizers.append(obj)
-                #
-                # We want to ensure topological ordering on the finalizers,
-                # at least assuming that there are no cycles.  So xxxx
-                # 
-                
-                #
-                # The stack will be moved to 'run_finalizers_queue'
-                #  to reverse the order:
-                # 
-                xxx
-
-
-                
-            else:
-                yyy
-                obj = self.get_forwarding_address(obj)
-                self.old_objects_with_finalizers.append(obj)
-
-        return self.young_objects_call_finalizers is not None
+            ll_assert(not pending.non_empty(),
+                      "deal_with_young_objects_with_finalizers: "
+                      "old_objects_pointing_to_young should be empty")
+            #
+            # Un-register the finalizer, because 'obj' will likely move
+            _finalizer = self.registered_finalizers.get(obj)
+            ll_assert(_finalizer != llmemory.NULL, "lost _finalizer")
+            self.registered_finalizers.setitem(obj, NULL)
+            #
+            # The following lines move 'obj' out of the nursery and add it to
+            # 'self.old_objects_pointing_to_young', unless the object was
+            # already seen previously, in which case they have no effect.
+            root = self.temp_root
+            root.address[0] = obj
+            self._trace_drag_out1(root)
+            objcopy = root.address[0]
+            #
+            # Re-regsiter the finalizer
+            self.registered_finalizers.setitem(objcopy, _finalizer)
+            #
+            # Follow all refs
+            while pending.non_empty():
+                assert not self.old_objects_with_cards_set.non_empty(), "XXX"
+                obj = pending.pop()
+                if obj:
+                    #
+                    if self.header(obj).tid & GCFLAG_HAS_FINALIZER:
+                        self.header(obj).tid &= ~GCFLAG_HAS_FINALIZER
+                        pending.append(obj)
+                        pending.append(NULL)   # marker
+                    #
+                    ll_assert(self.header(obj).tid & GCFLAG_TRACK_YOUNG_PTRS
+                              == 0, "bad flags [deal_young_finalizer]")
+                    self.header(obj).tid |= GCFLAG_TRACK_YOUNG_PTRS
+                    self.trace_and_drag_out_of_nursery(obj)
+                    #
+                else:
+                    # seen a NULL marker
+                    obj = pending.pop()
+                    finalizers_scheduled.append(obj)
+            # End of loop
+        #
+        # Copy the objects scheduled into 'run_finalizers_queue', in
+        # reverse order.
+        while finalizers_scheduled.non_empty():
+            obj = finalizers_scheduled.pop()
+            self.run_finalizers_queue.append(obj)
+        finalizers_scheduled.delete()
 
 
     def deal_with_old_objects_with_finalizers(self):
diff --git a/rpython/memory/gctypelayout.py b/rpython/memory/gctypelayout.py
--- a/rpython/memory/gctypelayout.py
+++ b/rpython/memory/gctypelayout.py
@@ -3,6 +3,8 @@
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rlib.debug import ll_assert
 from rpython.rlib.rarithmetic import intmask
+from rpython.rlib import rgc
+from rpython.rlib.objectmodel import we_are_translated
 from rpython.tool.identity_dict import identity_dict
 
 
@@ -135,6 +137,19 @@
         infobits = self.get(typeid).infobits
         return infobits & T_ANY_SLOW_FLAG == 0
 
+    def q_call_finalizer(self, finalizer, obj):
+        XXX
+        FINALIZER = lltype.Ptr(lltype.FuncType([llmemory.Address],
+                                               lltype.Void))
+        finalizer = llmemory.cast_adr_to_ptr(finalizer, FINALIZER)
+        finalizer(obj)
+##                except rgc.FinalizeLater:
+##                    xxx
+##                        except Exception, e:
+##                    XXX
+        return True
+
+
     def set_query_functions(self, gc):
         gc.set_query_functions(
             self.q_is_varsize,
@@ -152,7 +167,8 @@
             self.q_is_rpython_class,
             self.q_has_custom_trace,
             self.q_get_custom_trace,
-            self.q_fast_path_tracing)
+            self.q_fast_path_tracing,
+            self.q_call_finalizer)
 
 
 # the lowest 16bits are used to store group member index
@@ -387,8 +403,11 @@
         # must be overridden for proper custom tracer support
         return None
 
-    def initialize_gc_query_function(self, gc):
-        return GCData(self.type_info_group).set_query_functions(gc)
+    def initialize_gc_query_function(self, gc, call_finalizer=None):
+        gcdata = GCData(self.type_info_group)
+        if call_finalizer is not None:
+            gcdata.q_call_finalizer = call_finalizer   # for tests
+        gcdata.set_query_functions(gc)
 
     def consider_constant(self, TYPE, value, gc):
         if value is not lltype.top_container(value):
diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py
--- a/rpython/memory/gcwrapper.py
+++ b/rpython/memory/gcwrapper.py
@@ -29,7 +29,8 @@
                                                lltype2vtable,
                                                self.llinterp)
         self.get_type_id = layoutbuilder.get_type_id
-        layoutbuilder.initialize_gc_query_function(self.gc)
+        layoutbuilder.initialize_gc_query_function(
+            self.gc, layoutbuilder._call_finalizer)
 
         constants = collect_constants(flowgraphs)
         for obj in constants:
@@ -238,6 +239,13 @@
         else:
             return None
 
+    def _call_finalizer(self, finalizer, obj):
+        FUNC = lltype.typeOf(finalizer.ptr).TO
+        obj = llmemory.cast_adr_to_ptr(obj, FUNC.ARGS[0])
+        self.llinterp.eval_graph(finalizer.ptr._obj.graph, [obj],
+                                 recursive=True)
+        return True
+
 
 def collect_constants(graphs):
     constants = {}
diff --git a/rpython/memory/support.py b/rpython/memory/support.py
--- a/rpython/memory/support.py
+++ b/rpython/memory/support.py
@@ -114,6 +114,11 @@
                 self.shrink()
             return result
 
+        def peek(self):
+            used = self.used_in_last_chunk - 1
+            ll_assert(used >= 0, "peek on empty AddressStack")
+            return self.chunk.items[used]
+
         def delete(self):
             cur = self.chunk
             while cur:
@@ -270,6 +275,14 @@
                 cur = next
             free_non_gc_object(self)
 
+        def tolist(self):
+            """NOT_RPYTHON.  Returns the content as a list."""
+            lst = []
+            def _add(obj, lst):
+                lst.append(obj)
+            self.foreach(_add, lst)
+            return lst
+
     cache[chunk_size] = AddressDeque
     return AddressDeque
 
@@ -332,6 +345,14 @@
         for key, value in self.data.iteritems():
             callback(self._wrapkey(key), value, arg)
 
+    def tolist(self):
+        """NOT_RPYTHON.  Returns the content as a list."""
+        lst = []
+        def _add(key, value, lst):
+            lst.append((key, value))
+        self.foreach(_add, lst)
+        return lst
+
 
 def copy_and_update(dict, surviving, updated_address):
     """Make a copy of 'dict' in which the keys are updated as follows:
diff --git a/rpython/memory/test/test_gc.py b/rpython/memory/test/test_gc.py
--- a/rpython/memory/test/test_gc.py
+++ b/rpython/memory/test/test_gc.py
@@ -158,13 +158,14 @@
             pass
         b = B()
         b.nextid = 0
-        b.num_finalized = 0
+        b.num_finalized = -42
         class A(object):
             def __init__(self):
                 self.id = b.nextid
                 b.nextid += 1
             def finalizer(self):
                 b.num_finalized += 1
+                print "BIP", b.num_finalized
         def allocate(x):
             i = 0
             while i < x:
@@ -172,9 +173,14 @@
                 a = A()
                 rgc.register_finalizer(a.finalizer)
         def f(x):
+            print 'START'
+            b.num_finalized = 0
             allocate(x)
+            print 'XX', b.num_finalized
             llop.gc__collect(lltype.Void)
+            print 'XX', b.num_finalized
             llop.gc__collect(lltype.Void)
+            print 'XX', b.num_finalized
             return b.num_finalized
         res = self.interpret(f, [6])
         assert res == 6
diff --git a/rpython/memory/test/test_support.py b/rpython/memory/test/test_support.py
--- a/rpython/memory/test/test_support.py
+++ b/rpython/memory/test/test_support.py
@@ -49,8 +49,9 @@
             print i
             ll.append(addrs[i])
         for i in range(3000)[::-1]:
+            b = ll.peek()
             a = ll.pop()
-            assert a == addrs[i]
+            assert b == a == addrs[i]
         for i in range(3000):
             print i
             ll.append(addrs[i])


More information about the pypy-commit mailing list